From 90ff17b1d27572e9f8466bf3c9b3ffd201351440 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Thu, 9 Jan 2025 12:38:32 +0100 Subject: [PATCH 01/26] AtomicCircularBuffer and skeleton on CircuitBreaker --- .../ox/resilience/AtomicCircularBuffer.scala | 29 ++++ .../scala/ox/resilience/CircuitBreaker.scala | 133 ++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala create mode 100644 core/src/main/scala/ox/resilience/CircuitBreaker.scala diff --git a/core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala b/core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala new file mode 100644 index 00000000..40b975e9 --- /dev/null +++ b/core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala @@ -0,0 +1,29 @@ +package ox.resilience + +import scala.reflect.ClassTag +import java.util.concurrent.Semaphore + +class AtomicCircularBuffer[T: ClassTag](size: Int): + private val semaphore = Semaphore(1) + private var writeIndex = 0 + private var readIndex = 0 + private val buffer = Array.fill[Option[T]](size)(None) + def push(item: T): Unit = + semaphore.acquire() + try + buffer(writeIndex) = Some(item) + writeIndex = (writeIndex + 1) % size + finally semaphore.release() + def pop: Option[T] = + semaphore.acquire() + try + val result = buffer(readIndex) + readIndex = (readIndex + 1) % size + result + finally semaphore.release() + def peak: Option[T] = buffer(readIndex) + def snapshot: Array[T] = + semaphore.acquire() + try buffer.clone().flatMap(identity) + finally semaphore.release() +end AtomicCircularBuffer diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala new file mode 100644 index 00000000..d278899a --- /dev/null +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -0,0 +1,133 @@ +package ox.resilience + +import scala.concurrent.duration.* +import java.util.concurrent.atomic.AtomicReference +import ox.* + +enum CircuitBreakerState: + case Open + case Closed + case HalfOpen + +enum CircuitBreakerResult: + case Success + case Failure + case Slow + +enum SlidingWindow: + case CountBased(windowSize: Int) + case TimeBased(duration: Duration) + + + +// TODO -- missing params maxWaitDurationInHalfOpenState +case class CircuitBreakerConfig( + failureRateThreshold: Int = 50, + slowCallThreshold: Int = 0, + slowCallDurationThreshold: Duration = 60.seconds, + slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), + minimumNumberOfCalls: Int = 100, + numberOfCallsInHalfOpenState: Int +) + +case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) + +trait CircuitBreakerInterface: + def runOrDrop[T](op: => T): Option[T] + protected def tryAcquire: Boolean + def runBlocking[T](op: => T): T + def state: CircuitBreakerState + def resultPolicy[E, T]: ResultPolicy[E, T] + def reset: Unit +end CircuitBreakerInterface + +class CircuitBreaker( + private val config: CircuitBreakerConfig +)(using Ox) + extends CircuitBreakerInterface: + val stateMachine: CircuitBreakerStateMachine = CircuitBreakerStateMachine(config) + +private sealed trait CircuitBreakerStateMachine: + def getState: CircuitBreakerState + def registerResult(result: CircuitBreakerResult): Unit + def tryAcquire: AcquireResult + +private[resilience] object CircuitBreakerStateMachine: + def apply(config: CircuitBreakerConfig)(using Ox): CircuitBreakerStateMachine = + config.slidingWindow match + case SlidingWindow.CountBased(size) => + CircuitBreakerCountStateMachine( + size, + config.failureRateThreshold, + config.slowCallThreshold, + config.slowCallDurationThreshold, + config.minimumNumberOfCalls, + config.numberOfCallsInHalfOpenState + ) + case SlidingWindow.TimeBased(duration) => + CircuitBreakerTimeStateMachine( + config.failureRateThreshold, + config.slowCallThreshold, + config.slowCallDurationThreshold, + duration, + config.minimumNumberOfCalls, + config.numberOfCallsInHalfOpenState + ) + end apply + + private[resilience] case class CircuitBreakerCountStateMachine( + windowSize: Int, + failureRateThreshold: Int, + slowCallThreshold: Int, + slowCallDurationThreshold: Duration, + minimumNumberOfCalls: Int, + numberOfCallsInHalfOpenState: Int + )(using Ox) + extends CircuitBreakerStateMachine: + assert(failureRateThreshold >= 0 && failureRateThreshold <=1, s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold") + assert(slowCallThreshold >= 0 && slowCallThreshold <=1, s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold") + + private val state: AtomicReference[CircuitBreakerState] = AtomicReference(CircuitBreakerState.Closed) + private val callResults: AtomicCircularBuffer[CircuitBreakerResult] = AtomicCircularBuffer[CircuitBreakerResult](windowSize) + private val halfOpenTokens: TokenBucket = TokenBucket(numberOfCallsInHalfOpenState, Some(numberOfCallsInHalfOpenState)) + + def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = + callResults.push(result) + state.set(nextState(callResults.snapshot)) + + // TODO - should result in info if it was acquired in halfOpen state, so we know if we should release token after operation + def tryAcquire: AcquireResult = getState match + case CircuitBreakerState.Open => AcquireResult(true, CircuitBreakerState.Open) + case CircuitBreakerState.Closed => AcquireResult(false, CircuitBreakerState.Closed) + case CircuitBreakerState.HalfOpen => AcquireResult(halfOpenTokens.tryAcquire(1), CircuitBreakerState.HalfOpen) + + private def nextState(results: Array[CircuitBreakerResult]): CircuitBreakerState = + val numOfOperations = results.size + val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt + val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt + state.updateAndGet { current => + current match + case CircuitBreakerState.Open => + if numOfOperations >= minimumNumberOfCalls && (failuresRate < failureRateThreshold || slowRate < slowCallThreshold) then + // TODO - start time to go to HalfOpen + CircuitBreakerState.Closed + else CircuitBreakerState.Open + case CircuitBreakerState.Closed => + case CircuitBreakerState.HalfOpen => + + } + end nextState + + def getState: CircuitBreakerState = state.get() + end CircuitBreakerCountStateMachine + + private[resilience] case class CircuitBreakerTimeStateMachine( + failureRateThreshold: Int, + slowCallThreshold: Int, + slowCallDurationThreshold: Duration, + windowDuration: Duration, + minimumNumberOfCalls: Int, + numberOfCallsInHalfOpenState: Int + )(using Ox) + extends CircuitBreakerStateMachine +end CircuitBreakerStateMachine From 8447a0fc8c1b1900465d15556b9669cb8b815041 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Thu, 9 Jan 2025 15:18:28 +0100 Subject: [PATCH 02/26] CircuitBreakerCountStateMachine --- .../scala/ox/resilience/CircuitBreaker.scala | 141 ++++++++++++------ 1 file changed, 97 insertions(+), 44 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index d278899a..3d572196 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -3,11 +3,15 @@ package ox.resilience import scala.concurrent.duration.* import java.util.concurrent.atomic.AtomicReference import ox.* +import java.util.concurrent.atomic.AtomicInteger +import java.util.concurrent.TimeUnit +import ox.scheduling.scheduled +import ox.scheduling.{ScheduledConfig, Schedule} enum CircuitBreakerState: - case Open + case Open(since: Long) case Closed - case HalfOpen + case HalfOpen(since: Long) enum CircuitBreakerResult: case Success @@ -16,40 +20,54 @@ enum CircuitBreakerResult: enum SlidingWindow: case CountBased(windowSize: Int) - case TimeBased(duration: Duration) - + case TimeBased(duration: FiniteDuration) - -// TODO -- missing params maxWaitDurationInHalfOpenState +// TODO -- missing params maxWaitDurationInHalfOpenState - timeout to complete enough operations in HalfOpen state, otherwise go back to open case class CircuitBreakerConfig( failureRateThreshold: Int = 50, slowCallThreshold: Int = 0, - slowCallDurationThreshold: Duration = 60.seconds, + slowCallDurationThreshold: FiniteDuration = 60.seconds, slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), minimumNumberOfCalls: Int = 100, + waitDurationOpenState: FiniteDuration = FiniteDuration.apply(0, TimeUnit.MILLISECONDS), numberOfCallsInHalfOpenState: Int ) case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) -trait CircuitBreakerInterface: - def runOrDrop[T](op: => T): Option[T] - protected def tryAcquire: Boolean - def runBlocking[T](op: => T): T - def state: CircuitBreakerState - def resultPolicy[E, T]: ResultPolicy[E, T] - def reset: Unit -end CircuitBreakerInterface - -class CircuitBreaker( - private val config: CircuitBreakerConfig -)(using Ox) - extends CircuitBreakerInterface: - val stateMachine: CircuitBreakerStateMachine = CircuitBreakerStateMachine(config) +class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): + private val stateMachine: CircuitBreakerStateMachine = CircuitBreakerStateMachine(config) + private val slowCallDurationThreshold = config.slowCallDurationThreshold + + def state: CircuitBreakerState = stateMachine.getState + + def runOrDrop[E, F[_], T](em: ErrorMode[E, F])(resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])(op: => F[T]): Option[F[T]] = + val acquiredResult = stateMachine.tryAcquire + if acquiredResult.acquired then + val before = System.nanoTime() + val result = op + val after = System.nanoTime() + val duration = (after - before).nanos + // Check result and results of policy + if em.isError(result) && resultPolicy.isWorthRetrying(em.getError(result)) then + stateMachine.registerResult(CircuitBreakerResult.Failure, acquiredResult) + Some(result) + else if resultPolicy.isSuccess(em.getT(result)) then + if duration > slowCallDurationThreshold then stateMachine.registerResult(CircuitBreakerResult.Slow, acquiredResult) + else stateMachine.registerResult(CircuitBreakerResult.Success, acquiredResult) + Some(result) + else + stateMachine.registerResult(CircuitBreakerResult.Failure, acquiredResult) + Some(result) + end if + else None + end if + end runOrDrop +end CircuitBreaker private sealed trait CircuitBreakerStateMachine: def getState: CircuitBreakerState - def registerResult(result: CircuitBreakerResult): Unit + def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit def tryAcquire: AcquireResult private[resilience] object CircuitBreakerStateMachine: @@ -62,7 +80,8 @@ private[resilience] object CircuitBreakerStateMachine: config.slowCallThreshold, config.slowCallDurationThreshold, config.minimumNumberOfCalls, - config.numberOfCallsInHalfOpenState + config.numberOfCallsInHalfOpenState, + config.waitDurationOpenState ) case SlidingWindow.TimeBased(duration) => CircuitBreakerTimeStateMachine( @@ -71,7 +90,8 @@ private[resilience] object CircuitBreakerStateMachine: config.slowCallDurationThreshold, duration, config.minimumNumberOfCalls, - config.numberOfCallsInHalfOpenState + config.numberOfCallsInHalfOpenState, + config.waitDurationOpenState ) end apply @@ -79,27 +99,34 @@ private[resilience] object CircuitBreakerStateMachine: windowSize: Int, failureRateThreshold: Int, slowCallThreshold: Int, - slowCallDurationThreshold: Duration, + slowCallDurationThreshold: FiniteDuration, minimumNumberOfCalls: Int, - numberOfCallsInHalfOpenState: Int + numberOfCallsInHalfOpenState: Int, + waitDurationOpenState: FiniteDuration )(using Ox) extends CircuitBreakerStateMachine: - assert(failureRateThreshold >= 0 && failureRateThreshold <=1, s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold") - assert(slowCallThreshold >= 0 && slowCallThreshold <=1, s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold") - + assert( + failureRateThreshold >= 0 && failureRateThreshold <= 1, + s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold" + ) + assert(slowCallThreshold >= 0 && slowCallThreshold <= 1, s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold") + private val state: AtomicReference[CircuitBreakerState] = AtomicReference(CircuitBreakerState.Closed) private val callResults: AtomicCircularBuffer[CircuitBreakerResult] = AtomicCircularBuffer[CircuitBreakerResult](windowSize) private val halfOpenTokens: TokenBucket = TokenBucket(numberOfCallsInHalfOpenState, Some(numberOfCallsInHalfOpenState)) + private val halfOpenNumOfCalls: AtomicInteger = AtomicInteger(0) def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = callResults.push(result) + if acquired.acquired && acquired.circuitState == CircuitBreakerState.HalfOpen then + halfOpenTokens.release(1) + halfOpenNumOfCalls.incrementAndGet().discard state.set(nextState(callResults.snapshot)) - // TODO - should result in info if it was acquired in halfOpen state, so we know if we should release token after operation def tryAcquire: AcquireResult = getState match - case CircuitBreakerState.Open => AcquireResult(true, CircuitBreakerState.Open) - case CircuitBreakerState.Closed => AcquireResult(false, CircuitBreakerState.Closed) - case CircuitBreakerState.HalfOpen => AcquireResult(halfOpenTokens.tryAcquire(1), CircuitBreakerState.HalfOpen) + case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) + case currState @ CircuitBreakerState.Open(since) => AcquireResult(false, currState) + case currState @ CircuitBreakerState.HalfOpen(since) => AcquireResult(halfOpenTokens.tryAcquire(1), currState) private def nextState(results: Array[CircuitBreakerResult]): CircuitBreakerState = val numOfOperations = results.size @@ -107,13 +134,34 @@ private[resilience] object CircuitBreakerStateMachine: val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt state.updateAndGet { current => current match - case CircuitBreakerState.Open => - if numOfOperations >= minimumNumberOfCalls && (failuresRate < failureRateThreshold || slowRate < slowCallThreshold) then - // TODO - start time to go to HalfOpen - CircuitBreakerState.Closed - else CircuitBreakerState.Open - case CircuitBreakerState.Closed => - case CircuitBreakerState.HalfOpen => + // After operation check if we didn't cross thresholds + case CircuitBreakerState.Closed => + if numOfOperations >= minimumNumberOfCalls && (failuresRate >= failureRateThreshold || slowRate >= slowCallThreshold) then + // Start schedule to switch to HalfOpen after waitDurationOpenState passed + forkDiscard( + scheduled(ScheduledConfig[Throwable, Unit](Schedule.InitialDelay(waitDurationOpenState)))( + state.set(CircuitBreakerState.HalfOpen(System.currentTimeMillis())) + ) + ) + CircuitBreakerState.Open(System.currentTimeMillis()) + else CircuitBreakerState.Closed + case CircuitBreakerState.Open(since) => + val timePassed = (System.currentTimeMillis() - since) > slowCallDurationThreshold.toMillis + if timePassed then CircuitBreakerState.HalfOpen(System.currentTimeMillis()) + else CircuitBreakerState.Open(since) + case CircuitBreakerState.HalfOpen(since) => + // If halfOpen calls were completed && rates are below we open again + if halfOpenNumOfCalls.get() == numberOfCallsInHalfOpenState && + (failuresRate < failureRateThreshold || slowRate < slowCallThreshold) + then CircuitBreakerState.Open(System.currentTimeMillis()) + // If halfOpen calls completed, but rates are still above go back to open + else if halfOpenNumOfCalls.get() == numberOfCallsInHalfOpenState && + (failuresRate >= failureRateThreshold || slowRate >= slowCallThreshold) + then + halfOpenNumOfCalls.set(0) + CircuitBreakerState.Open(System.currentTimeMillis()) + // We didn't complete all half open calls, keep halfOpen + else CircuitBreakerState.HalfOpen(since) } end nextState @@ -124,10 +172,15 @@ private[resilience] object CircuitBreakerStateMachine: private[resilience] case class CircuitBreakerTimeStateMachine( failureRateThreshold: Int, slowCallThreshold: Int, - slowCallDurationThreshold: Duration, - windowDuration: Duration, + slowCallDurationThreshold: FiniteDuration, + windowDuration: FiniteDuration, minimumNumberOfCalls: Int, - numberOfCallsInHalfOpenState: Int + numberOfCallsInHalfOpenState: Int, + waitDurationOpenState: FiniteDuration )(using Ox) - extends CircuitBreakerStateMachine + extends CircuitBreakerStateMachine: + def getState: CircuitBreakerState = ??? + def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = ??? + def tryAcquire: AcquireResult = ??? + end CircuitBreakerTimeStateMachine end CircuitBreakerStateMachine From 9951f2927d16abc064f8b73e905bd522f2cb5540 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Fri, 10 Jan 2025 21:30:06 +0100 Subject: [PATCH 03/26] WIP --- .../scala/ox/resilience/CircuitBreaker.scala | 206 ++++++++++-------- 1 file changed, 112 insertions(+), 94 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 3d572196..c0e19456 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -3,21 +3,29 @@ package ox.resilience import scala.concurrent.duration.* import java.util.concurrent.atomic.AtomicReference import ox.* -import java.util.concurrent.atomic.AtomicInteger import java.util.concurrent.TimeUnit import ox.scheduling.scheduled import ox.scheduling.{ScheduledConfig, Schedule} +import java.util.concurrent.Semaphore +import ox.channels.Actor +import ox.channels.BufferCapacity enum CircuitBreakerState: case Open(since: Long) case Closed - case HalfOpen(since: Long) + case HalfOpen(since: Long, semaphore: Semaphore, completedOperations: Int = 0) enum CircuitBreakerResult: case Success case Failure case Slow +case class Metrics( + failureRate: Int, + slowCallsRate: Int, + operationsInWindow: Int +) + enum SlidingWindow: case CountBased(windowSize: Int) case TimeBased(duration: FiniteDuration) @@ -29,20 +37,49 @@ case class CircuitBreakerConfig( slowCallDurationThreshold: FiniteDuration = 60.seconds, slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), minimumNumberOfCalls: Int = 100, - waitDurationOpenState: FiniteDuration = FiniteDuration.apply(0, TimeUnit.MILLISECONDS), + waitDurationOpenState: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), + halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), numberOfCallsInHalfOpenState: Int ) -case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) +private case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) + +private case class CircuitBreakerStateMachineConfig( + failureRateThreshold: Int, + slowCallThreshold: Int, + slowCallDurationThreshold: FiniteDuration, + minimumNumberOfCalls: Int, + numberOfCallsInHalfOpenState: Int, + waitDurationOpenState: FiniteDuration, + halfOpenTimeoutDuration: FiniteDuration, + state: AtomicReference[CircuitBreakerState] +) +private object CircuitBreakerStateMachineConfig: + def fromConfig(c: CircuitBreakerConfig, state: AtomicReference[CircuitBreakerState]): CircuitBreakerStateMachineConfig = + CircuitBreakerStateMachineConfig( + failureRateThreshold = c.failureRateThreshold, + slowCallThreshold = c.slowCallThreshold, + slowCallDurationThreshold = c.slowCallDurationThreshold, + minimumNumberOfCalls = c.minimumNumberOfCalls, + numberOfCallsInHalfOpenState = c.numberOfCallsInHalfOpenState, + waitDurationOpenState = c.waitDurationOpenState, + halfOpenTimeoutDuration = c.halfOpenTimeoutDuration, + state = state + ) +end CircuitBreakerStateMachineConfig class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): - private val stateMachine: CircuitBreakerStateMachine = CircuitBreakerStateMachine(config) - private val slowCallDurationThreshold = config.slowCallDurationThreshold + private val state = AtomicReference[CircuitBreakerState](CircuitBreakerState.Closed) + private val actorRef = Actor.create(CircuitBreakerStateMachine(config, state)) - def state: CircuitBreakerState = stateMachine.getState + private def tryAcquire: AcquireResult = state.get match + case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) + case currState @ CircuitBreakerState.Open(since) => AcquireResult(false, currState) + case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) + // TODO - register schedule for timeouts def runOrDrop[E, F[_], T](em: ErrorMode[E, F])(resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])(op: => F[T]): Option[F[T]] = - val acquiredResult = stateMachine.tryAcquire + val acquiredResult = tryAcquire if acquiredResult.acquired then val before = System.nanoTime() val result = op @@ -50,14 +87,14 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): val duration = (after - before).nanos // Check result and results of policy if em.isError(result) && resultPolicy.isWorthRetrying(em.getError(result)) then - stateMachine.registerResult(CircuitBreakerResult.Failure, acquiredResult) + actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) Some(result) else if resultPolicy.isSuccess(em.getT(result)) then - if duration > slowCallDurationThreshold then stateMachine.registerResult(CircuitBreakerResult.Slow, acquiredResult) - else stateMachine.registerResult(CircuitBreakerResult.Success, acquiredResult) + if duration > config.slowCallDurationThreshold then actorRef.tell(_.registerResult(CircuitBreakerResult.Slow, acquiredResult)) + else actorRef.tell(_.registerResult(CircuitBreakerResult.Success, acquiredResult)) Some(result) else - stateMachine.registerResult(CircuitBreakerResult.Failure, acquiredResult) + actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) Some(result) end if else None @@ -66,121 +103,102 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): end CircuitBreaker private sealed trait CircuitBreakerStateMachine: - def getState: CircuitBreakerState def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit - def tryAcquire: AcquireResult private[resilience] object CircuitBreakerStateMachine: - def apply(config: CircuitBreakerConfig)(using Ox): CircuitBreakerStateMachine = + def apply(config: CircuitBreakerConfig, state: AtomicReference[CircuitBreakerState])(using Ox): CircuitBreakerStateMachine = config.slidingWindow match case SlidingWindow.CountBased(size) => CircuitBreakerCountStateMachine( - size, - config.failureRateThreshold, - config.slowCallThreshold, - config.slowCallDurationThreshold, - config.minimumNumberOfCalls, - config.numberOfCallsInHalfOpenState, - config.waitDurationOpenState + CircuitBreakerStateMachineConfig.fromConfig(config, state), + size ) case SlidingWindow.TimeBased(duration) => CircuitBreakerTimeStateMachine( - config.failureRateThreshold, - config.slowCallThreshold, - config.slowCallDurationThreshold, - duration, - config.minimumNumberOfCalls, - config.numberOfCallsInHalfOpenState, - config.waitDurationOpenState + CircuitBreakerStateMachineConfig.fromConfig(config, state), + duration ) end apply private[resilience] case class CircuitBreakerCountStateMachine( - windowSize: Int, - failureRateThreshold: Int, - slowCallThreshold: Int, - slowCallDurationThreshold: FiniteDuration, - minimumNumberOfCalls: Int, - numberOfCallsInHalfOpenState: Int, - waitDurationOpenState: FiniteDuration + config: CircuitBreakerStateMachineConfig, + windowSize: Int )(using Ox) extends CircuitBreakerStateMachine: assert( - failureRateThreshold >= 0 && failureRateThreshold <= 1, - s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold" + config.failureRateThreshold >= 0 && config.failureRateThreshold <= 1, + s"failureRateThreshold must be between 0 and 100, value: ${config.failureRateThreshold}" + ) + assert( + config.slowCallThreshold >= 0 && config.slowCallThreshold <= 1, + s"slowCallThreshold must be between 0 and 100, value: ${config.slowCallThreshold}" ) - assert(slowCallThreshold >= 0 && slowCallThreshold <= 1, s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold") - private val state: AtomicReference[CircuitBreakerState] = AtomicReference(CircuitBreakerState.Closed) private val callResults: AtomicCircularBuffer[CircuitBreakerResult] = AtomicCircularBuffer[CircuitBreakerResult](windowSize) - private val halfOpenTokens: TokenBucket = TokenBucket(numberOfCallsInHalfOpenState, Some(numberOfCallsInHalfOpenState)) - private val halfOpenNumOfCalls: AtomicInteger = AtomicInteger(0) def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = callResults.push(result) - if acquired.acquired && acquired.circuitState == CircuitBreakerState.HalfOpen then - halfOpenTokens.release(1) - halfOpenNumOfCalls.incrementAndGet().discard - state.set(nextState(callResults.snapshot)) + // In case of result coming from halfOpen state we update num of completed operation in this state if it didn't change + if acquired.circuitState == CircuitBreakerState.HalfOpen then + config.state.updateAndGet { + case CircuitBreakerState.HalfOpen(since, semaphore, completedOperations) => + CircuitBreakerState.HalfOpen(since, semaphore, completedOperations + 1) + case state => state + }.discard - def tryAcquire: AcquireResult = getState match - case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) - case currState @ CircuitBreakerState.Open(since) => AcquireResult(false, currState) - case currState @ CircuitBreakerState.HalfOpen(since) => AcquireResult(halfOpenTokens.tryAcquire(1), currState) + config.state.set(nextState) + end registerResult - private def nextState(results: Array[CircuitBreakerResult]): CircuitBreakerState = + def updateState(): Unit = + config.state.set(nextState) + + def callculateMetrics(results: Array[CircuitBreakerResult]): Metrics = val numOfOperations = results.size val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt - state.updateAndGet { current => - current match - // After operation check if we didn't cross thresholds - case CircuitBreakerState.Closed => - if numOfOperations >= minimumNumberOfCalls && (failuresRate >= failureRateThreshold || slowRate >= slowCallThreshold) then - // Start schedule to switch to HalfOpen after waitDurationOpenState passed - forkDiscard( - scheduled(ScheduledConfig[Throwable, Unit](Schedule.InitialDelay(waitDurationOpenState)))( - state.set(CircuitBreakerState.HalfOpen(System.currentTimeMillis())) - ) - ) - CircuitBreakerState.Open(System.currentTimeMillis()) - else CircuitBreakerState.Closed - case CircuitBreakerState.Open(since) => - val timePassed = (System.currentTimeMillis() - since) > slowCallDurationThreshold.toMillis - if timePassed then CircuitBreakerState.HalfOpen(System.currentTimeMillis()) - else CircuitBreakerState.Open(since) - case CircuitBreakerState.HalfOpen(since) => - // If halfOpen calls were completed && rates are below we open again - if halfOpenNumOfCalls.get() == numberOfCallsInHalfOpenState && - (failuresRate < failureRateThreshold || slowRate < slowCallThreshold) - then CircuitBreakerState.Open(System.currentTimeMillis()) - // If halfOpen calls completed, but rates are still above go back to open - else if halfOpenNumOfCalls.get() == numberOfCallsInHalfOpenState && - (failuresRate >= failureRateThreshold || slowRate >= slowCallThreshold) - then - halfOpenNumOfCalls.set(0) - CircuitBreakerState.Open(System.currentTimeMillis()) - // We didn't complete all half open calls, keep halfOpen - else CircuitBreakerState.HalfOpen(since) - - } + Metrics( + failuresRate, + slowRate, + numOfOperations + ) + end callculateMetrics + + private def nextState: CircuitBreakerState = + val metrics = callculateMetrics(callResults.snapshot) + config.state.get match + case CircuitBreakerState.Closed => + if metrics.operationsInWindow >= config.minimumNumberOfCalls && (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + then CircuitBreakerState.Open(System.currentTimeMillis()) + else CircuitBreakerState.Closed + case CircuitBreakerState.Open(since) => + val timePassed = (System.currentTimeMillis() - since) > config.slowCallDurationThreshold.toMillis + if timePassed then CircuitBreakerState.HalfOpen(System.currentTimeMillis(), Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(since) + case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => + lazy val timePassed = (System.currentTimeMillis() - since) > config.slowCallDurationThreshold.toMillis + // If halfOpen calls were completed && rates are below we open again + if completedCalls == config.numberOfCallsInHalfOpenState && + (metrics.failureRate < config.failureRateThreshold || metrics.slowCallsRate < config.slowCallThreshold) + then CircuitBreakerState.Open(System.currentTimeMillis()) + // If halfOpen calls completed, but rates are still above go back to open + else if completedCalls == config.numberOfCallsInHalfOpenState && + (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + then CircuitBreakerState.Open(System.currentTimeMillis()) + // if we didn't complete all half open calls but timeout is reached go back to open + else if completedCalls != config.numberOfCallsInHalfOpenState && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + CircuitBreakerState.Open(System.currentTimeMillis()) + // We didn't complete all half open calls, keep halfOpen + else CircuitBreakerState.HalfOpen(since, semaphore) + end if + end match end nextState - - def getState: CircuitBreakerState = state.get() end CircuitBreakerCountStateMachine private[resilience] case class CircuitBreakerTimeStateMachine( - failureRateThreshold: Int, - slowCallThreshold: Int, - slowCallDurationThreshold: FiniteDuration, - windowDuration: FiniteDuration, - minimumNumberOfCalls: Int, - numberOfCallsInHalfOpenState: Int, - waitDurationOpenState: FiniteDuration + config: CircuitBreakerStateMachineConfig, + windowDuration: FiniteDuration )(using Ox) extends CircuitBreakerStateMachine: - def getState: CircuitBreakerState = ??? def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = ??? - def tryAcquire: AcquireResult = ??? end CircuitBreakerTimeStateMachine end CircuitBreakerStateMachine From ed9985c8bc1b468fd2f84cd693a85578b952383c Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 13 Jan 2025 17:23:57 +0100 Subject: [PATCH 04/26] breaker based on actor --- .../scala/ox/resilience/CircuitBreaker.scala | 89 +++++++++++++------ .../ox/resilience/CircuitBreakerTest.scala | 59 ++++++++++++ 2 files changed, 123 insertions(+), 25 deletions(-) create mode 100644 core/src/test/scala/ox/resilience/CircuitBreakerTest.scala diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index c0e19456..8326e212 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -9,6 +9,7 @@ import ox.scheduling.{ScheduledConfig, Schedule} import java.util.concurrent.Semaphore import ox.channels.Actor import ox.channels.BufferCapacity +import ox.channels.ActorRef enum CircuitBreakerState: case Open(since: Long) @@ -39,7 +40,7 @@ case class CircuitBreakerConfig( minimumNumberOfCalls: Int = 100, waitDurationOpenState: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), - numberOfCallsInHalfOpenState: Int + numberOfCallsInHalfOpenState: Int = 10 ) private case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) @@ -70,15 +71,15 @@ end CircuitBreakerStateMachineConfig class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): private val state = AtomicReference[CircuitBreakerState](CircuitBreakerState.Closed) - private val actorRef = Actor.create(CircuitBreakerStateMachine(config, state)) + private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(CircuitBreakerStateMachine(config, state)) private def tryAcquire: AcquireResult = state.get match case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) - case currState @ CircuitBreakerState.Open(since) => AcquireResult(false, currState) + case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) // TODO - register schedule for timeouts - def runOrDrop[E, F[_], T](em: ErrorMode[E, F])(resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])(op: => F[T]): Option[F[T]] = + def runOrDrop[E, F[_], T](em: ErrorMode[E, F], resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])(op: => F[T]): Option[F[T]] = val acquiredResult = tryAcquire if acquiredResult.acquired then val before = System.nanoTime() @@ -100,13 +101,44 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): else None end if end runOrDrop + + def runEitherOrDrop[E, T](resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( + op: => Either[E, T] + ): Option[Either[E, T]] = + val em = EitherMode[E] + val acquiredResult = tryAcquire + if acquiredResult.acquired then + val before = System.nanoTime() + val result = op + val after = System.nanoTime() + val duration = (after - before).nanos + // Check result and results of policy + if em.isError(result) && resultPolicy.isWorthRetrying(em.getError(result)) then + actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) + Some(result) + else if resultPolicy.isSuccess(em.getT(result)) then + if duration > config.slowCallDurationThreshold then actorRef.tell(_.registerResult(CircuitBreakerResult.Slow, acquiredResult)) + else actorRef.tell(_.registerResult(CircuitBreakerResult.Success, acquiredResult)) + Some(result) + else + actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) + Some(result) + end if + else None + end if + end runEitherOrDrop end CircuitBreaker private sealed trait CircuitBreakerStateMachine: def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit + def updateState(): Unit + def calculateMetrics(results: Array[CircuitBreakerResult]): Metrics +// def selfRef: ActorRef[CircuitBreakerStateMachine] private[resilience] object CircuitBreakerStateMachine: - def apply(config: CircuitBreakerConfig, state: AtomicReference[CircuitBreakerState])(using Ox): CircuitBreakerStateMachine = + def apply(config: CircuitBreakerConfig, state: AtomicReference[CircuitBreakerState])(using + Ox + ): CircuitBreakerStateMachine = config.slidingWindow match case SlidingWindow.CountBased(size) => CircuitBreakerCountStateMachine( @@ -126,11 +158,11 @@ private[resilience] object CircuitBreakerStateMachine: )(using Ox) extends CircuitBreakerStateMachine: assert( - config.failureRateThreshold >= 0 && config.failureRateThreshold <= 1, + config.failureRateThreshold >= 0 && config.failureRateThreshold <= 100, s"failureRateThreshold must be between 0 and 100, value: ${config.failureRateThreshold}" ) assert( - config.slowCallThreshold >= 0 && config.slowCallThreshold <= 1, + config.slowCallThreshold >= 0 && config.slowCallThreshold <= 100, s"slowCallThreshold must be between 0 and 100, value: ${config.slowCallThreshold}" ) @@ -152,8 +184,8 @@ private[resilience] object CircuitBreakerStateMachine: def updateState(): Unit = config.state.set(nextState) - def callculateMetrics(results: Array[CircuitBreakerResult]): Metrics = - val numOfOperations = results.size + def calculateMetrics(results: Array[CircuitBreakerResult]): Metrics = + val numOfOperations = results.length val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt Metrics( @@ -161,34 +193,39 @@ private[resilience] object CircuitBreakerStateMachine: slowRate, numOfOperations ) - end callculateMetrics + end calculateMetrics private def nextState: CircuitBreakerState = - val metrics = callculateMetrics(callResults.snapshot) + val metrics = calculateMetrics(callResults.snapshot) + val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls config.state.get match case CircuitBreakerState.Closed => - if metrics.operationsInWindow >= config.minimumNumberOfCalls && (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) - then CircuitBreakerState.Open(System.currentTimeMillis()) + if minCallsRecorder && exceededThreshold then + // schedule switch to halfOpen after timeout +// forkDiscard: +// scheduled(ScheduledConfig(Schedule.InitialDelay(config.waitDurationOpenState)))(selfRef.tell(_.updateState())) + CircuitBreakerState.Open(System.currentTimeMillis()) else CircuitBreakerState.Closed case CircuitBreakerState.Open(since) => - val timePassed = (System.currentTimeMillis() - since) > config.slowCallDurationThreshold.toMillis + val timePassed = (System.currentTimeMillis() - since) > config.waitDurationOpenState.toMillis if timePassed then CircuitBreakerState.HalfOpen(System.currentTimeMillis(), Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(since) case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val timePassed = (System.currentTimeMillis() - since) > config.slowCallDurationThreshold.toMillis - // If halfOpen calls were completed && rates are below we open again - if completedCalls == config.numberOfCallsInHalfOpenState && - (metrics.failureRate < config.failureRateThreshold || metrics.slowCallsRate < config.slowCallThreshold) - then CircuitBreakerState.Open(System.currentTimeMillis()) - // If halfOpen calls completed, but rates are still above go back to open - else if completedCalls == config.numberOfCallsInHalfOpenState && - (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) - then CircuitBreakerState.Open(System.currentTimeMillis()) + lazy val timePassed = (System.currentTimeMillis() - since) > config.halfOpenTimeoutDuration.toMillis // if we didn't complete all half open calls but timeout is reached go back to open - else if completedCalls != config.numberOfCallsInHalfOpenState && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + if !minCallsRecorder && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + // schedule timeout for halfOpen state +// forkDiscard: +// scheduled(ScheduledConfig(Schedule.InitialDelay(config.halfOpenTimeoutDuration)))(selfRef.tell(_.updateState())) CircuitBreakerState.Open(System.currentTimeMillis()) + // If halfOpen calls were completed && rates are below we close breaker + else if minCallsRecorder && !exceededThreshold then CircuitBreakerState.Open(System.currentTimeMillis()) + // If halfOpen calls completed, but rates are still above go back to open + else if minCallsRecorder && exceededThreshold + then CircuitBreakerState.Open(System.currentTimeMillis()) // We didn't complete all half open calls, keep halfOpen - else CircuitBreakerState.HalfOpen(since, semaphore) + else CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) end if end match end nextState @@ -200,5 +237,7 @@ private[resilience] object CircuitBreakerStateMachine: )(using Ox) extends CircuitBreakerStateMachine: def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = ??? + def updateState(): Unit = ??? + def calculateMetrics(results: Array[CircuitBreakerResult]): Metrics = ??? end CircuitBreakerTimeStateMachine end CircuitBreakerStateMachine diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala new file mode 100644 index 00000000..468a6cff --- /dev/null +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -0,0 +1,59 @@ +package ox.resilience + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import scala.concurrent.duration.* +import ox.* + +class CircuitBreakerTest extends AnyFlatSpec with Matchers: + behavior of "Circuit Breaker" + + it should "drop operation after exceeding threshold" in supervised { + // given + val thresholdRate = 100 + val numberOfOperations = 1 + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig( + failureRateThreshold = thresholdRate, + minimumNumberOfCalls = numberOfOperations, + slidingWindow = SlidingWindow.CountBased(numberOfOperations) + ) + ) + + def f(): Either[String, String] = + Left("boom") + // when + val result1 = circuitBreaker.runEitherOrDrop(ResultPolicy.default)(f()) + sleep(100.millis) // wait for state to register + val result2 = circuitBreaker.runEitherOrDrop(ResultPolicy.default)(f()) + + // then + result1 shouldBe defined + result2 shouldBe empty + } + + it should "run" in supervised { + // given + val thresholdRate = 100 + val numberOfOperations = 10 + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig( + failureRateThreshold = thresholdRate, + minimumNumberOfCalls = numberOfOperations, + slidingWindow = SlidingWindow.CountBased(numberOfOperations) + ) + ) + var counter = 0 + def f(): Either[String, String] = + counter += 1 + Left("boom") + + // when + 0 to 50 foreach: _ => + circuitBreaker.runEitherOrDrop(ResultPolicy.default)(f()) + + // then + println(counter) + } + +end CircuitBreakerTest From 11d800515253a2e647f37b4b5f60505a057825f0 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Tue, 14 Jan 2025 15:21:50 +0100 Subject: [PATCH 05/26] Don't use atomics inside state machine --- .../scala/ox/resilience/CircuitBreaker.scala | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 8326e212..c7376124 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -10,6 +10,7 @@ import java.util.concurrent.Semaphore import ox.channels.Actor import ox.channels.BufferCapacity import ox.channels.ActorRef +import ox.channels.Channel.withCapacity enum CircuitBreakerState: case Open(since: Long) @@ -38,7 +39,7 @@ case class CircuitBreakerConfig( slowCallDurationThreshold: FiniteDuration = 60.seconds, slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), minimumNumberOfCalls: Int = 100, - waitDurationOpenState: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), + waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS), halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), numberOfCallsInHalfOpenState: Int = 10 ) @@ -52,11 +53,10 @@ private case class CircuitBreakerStateMachineConfig( minimumNumberOfCalls: Int, numberOfCallsInHalfOpenState: Int, waitDurationOpenState: FiniteDuration, - halfOpenTimeoutDuration: FiniteDuration, - state: AtomicReference[CircuitBreakerState] + halfOpenTimeoutDuration: FiniteDuration ) private object CircuitBreakerStateMachineConfig: - def fromConfig(c: CircuitBreakerConfig, state: AtomicReference[CircuitBreakerState]): CircuitBreakerStateMachineConfig = + def fromConfig(c: CircuitBreakerConfig): CircuitBreakerStateMachineConfig = CircuitBreakerStateMachineConfig( failureRateThreshold = c.failureRateThreshold, slowCallThreshold = c.slowCallThreshold, @@ -64,16 +64,15 @@ private object CircuitBreakerStateMachineConfig: minimumNumberOfCalls = c.minimumNumberOfCalls, numberOfCallsInHalfOpenState = c.numberOfCallsInHalfOpenState, waitDurationOpenState = c.waitDurationOpenState, - halfOpenTimeoutDuration = c.halfOpenTimeoutDuration, - state = state + halfOpenTimeoutDuration = c.halfOpenTimeoutDuration ) end CircuitBreakerStateMachineConfig class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): - private val state = AtomicReference[CircuitBreakerState](CircuitBreakerState.Closed) - private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(CircuitBreakerStateMachine(config, state)) + val stateMachine = CircuitBreakerStateMachine(config) + private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) - private def tryAcquire: AcquireResult = state.get match + private def tryAcquire: AcquireResult = stateMachine.state match case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) @@ -132,22 +131,23 @@ end CircuitBreaker private sealed trait CircuitBreakerStateMachine: def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit def updateState(): Unit - def calculateMetrics(results: Array[CircuitBreakerResult]): Metrics + def calculateMetrics: Metrics + def state: CircuitBreakerState // def selfRef: ActorRef[CircuitBreakerStateMachine] private[resilience] object CircuitBreakerStateMachine: - def apply(config: CircuitBreakerConfig, state: AtomicReference[CircuitBreakerState])(using + def apply(config: CircuitBreakerConfig)(using Ox ): CircuitBreakerStateMachine = config.slidingWindow match case SlidingWindow.CountBased(size) => CircuitBreakerCountStateMachine( - CircuitBreakerStateMachineConfig.fromConfig(config, state), + CircuitBreakerStateMachineConfig.fromConfig(config), size ) case SlidingWindow.TimeBased(duration) => CircuitBreakerTimeStateMachine( - CircuitBreakerStateMachineConfig.fromConfig(config, state), + CircuitBreakerStateMachineConfig.fromConfig(config), duration ) end apply @@ -166,25 +166,32 @@ private[resilience] object CircuitBreakerStateMachine: s"slowCallThreshold must be between 0 and 100, value: ${config.slowCallThreshold}" ) - private val callResults: AtomicCircularBuffer[CircuitBreakerResult] = AtomicCircularBuffer[CircuitBreakerResult](windowSize) + private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) + private var writeIndex = 0 + + private var _state: CircuitBreakerState = CircuitBreakerState.Closed + + def state: CircuitBreakerState = _state def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = - callResults.push(result) + callResults(writeIndex) = Some(result) + writeIndex = (writeIndex + 1) % windowSize // In case of result coming from halfOpen state we update num of completed operation in this state if it didn't change if acquired.circuitState == CircuitBreakerState.HalfOpen then - config.state.updateAndGet { + val newState = _state match case CircuitBreakerState.HalfOpen(since, semaphore, completedOperations) => CircuitBreakerState.HalfOpen(since, semaphore, completedOperations + 1) case state => state - }.discard + _state = newState - config.state.set(nextState) + _state = nextState end registerResult def updateState(): Unit = - config.state.set(nextState) + _state = nextState - def calculateMetrics(results: Array[CircuitBreakerResult]): Metrics = + def calculateMetrics: Metrics = + val results = callResults.flatMap(identity) val numOfOperations = results.length val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt @@ -196,10 +203,10 @@ private[resilience] object CircuitBreakerStateMachine: end calculateMetrics private def nextState: CircuitBreakerState = - val metrics = calculateMetrics(callResults.snapshot) + val metrics = calculateMetrics val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls - config.state.get match + _state match case CircuitBreakerState.Closed => if minCallsRecorder && exceededThreshold then // schedule switch to halfOpen after timeout @@ -238,6 +245,7 @@ private[resilience] object CircuitBreakerStateMachine: extends CircuitBreakerStateMachine: def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = ??? def updateState(): Unit = ??? - def calculateMetrics(results: Array[CircuitBreakerResult]): Metrics = ??? + def calculateMetrics: Metrics = ??? + def state: CircuitBreakerState = ??? end CircuitBreakerTimeStateMachine end CircuitBreakerStateMachine From 72ab369c63604ed0fcdd8d80b2d9963a10af1e7a Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Tue, 14 Jan 2025 15:45:09 +0100 Subject: [PATCH 06/26] Delete out of date TODOs --- core/src/main/scala/ox/resilience/CircuitBreaker.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index c7376124..5f432b0f 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -1,7 +1,6 @@ package ox.resilience import scala.concurrent.duration.* -import java.util.concurrent.atomic.AtomicReference import ox.* import java.util.concurrent.TimeUnit import ox.scheduling.scheduled @@ -10,7 +9,6 @@ import java.util.concurrent.Semaphore import ox.channels.Actor import ox.channels.BufferCapacity import ox.channels.ActorRef -import ox.channels.Channel.withCapacity enum CircuitBreakerState: case Open(since: Long) @@ -32,7 +30,6 @@ enum SlidingWindow: case CountBased(windowSize: Int) case TimeBased(duration: FiniteDuration) -// TODO -- missing params maxWaitDurationInHalfOpenState - timeout to complete enough operations in HalfOpen state, otherwise go back to open case class CircuitBreakerConfig( failureRateThreshold: Int = 50, slowCallThreshold: Int = 0, @@ -77,7 +74,6 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) - // TODO - register schedule for timeouts def runOrDrop[E, F[_], T](em: ErrorMode[E, F], resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])(op: => F[T]): Option[F[T]] = val acquiredResult = tryAcquire if acquiredResult.acquired then From 81ee70e7cd35c5ee5cbbc5cb8c6515eda47062bb Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Wed, 15 Jan 2025 19:58:10 +0100 Subject: [PATCH 07/26] Refactor nextState to be pure --- .../scala/ox/resilience/CircuitBreaker.scala | 225 +++++++++--------- .../ox/resilience/CircuitBreakerTest.scala | 6 +- 2 files changed, 116 insertions(+), 115 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 5f432b0f..9d0c8e64 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -9,6 +9,7 @@ import java.util.concurrent.Semaphore import ox.channels.Actor import ox.channels.BufferCapacity import ox.channels.ActorRef +import scala.util.Try enum CircuitBreakerState: case Open(since: Long) @@ -23,7 +24,8 @@ enum CircuitBreakerResult: case class Metrics( failureRate: Int, slowCallsRate: Int, - operationsInWindow: Int + operationsInWindow: Int, + lastAcquisitionResult: Option[AcquireResult] ) enum SlidingWindow: @@ -39,7 +41,16 @@ case class CircuitBreakerConfig( waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS), halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), numberOfCallsInHalfOpenState: Int = 10 -) +): + assert( + failureRateThreshold >= 0 && failureRateThreshold <= 100, + s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold" + ) + assert( + slowCallThreshold >= 0 && slowCallThreshold <= 100, + s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold" + ) +end CircuitBreakerConfig private case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) @@ -74,62 +85,113 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) - def runOrDrop[E, F[_], T](em: ErrorMode[E, F], resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])(op: => F[T]): Option[F[T]] = + def runOrDropWithErrorMode[E, F[_], T](em: ErrorMode[E, F], resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( + operation: => F[T] + ): Option[F[T]] = val acquiredResult = tryAcquire if acquiredResult.acquired then val before = System.nanoTime() - val result = op + val result = operation val after = System.nanoTime() val duration = (after - before).nanos // Check result and results of policy if em.isError(result) && resultPolicy.isWorthRetrying(em.getError(result)) then - actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) + actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult, actorRef)) Some(result) else if resultPolicy.isSuccess(em.getT(result)) then - if duration > config.slowCallDurationThreshold then actorRef.tell(_.registerResult(CircuitBreakerResult.Slow, acquiredResult)) - else actorRef.tell(_.registerResult(CircuitBreakerResult.Success, acquiredResult)) + if duration > config.slowCallDurationThreshold then + actorRef.tell(_.registerResult(CircuitBreakerResult.Slow, acquiredResult, actorRef)) + else actorRef.tell(_.registerResult(CircuitBreakerResult.Success, acquiredResult, actorRef)) Some(result) else - actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) + actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult, actorRef)) Some(result) end if else None end if - end runOrDrop + end runOrDropWithErrorMode - def runEitherOrDrop[E, T](resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( - op: => Either[E, T] + def runOrDropEither[E, T](resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( + operation: => Either[E, T] ): Option[Either[E, T]] = - val em = EitherMode[E] - val acquiredResult = tryAcquire - if acquiredResult.acquired then - val before = System.nanoTime() - val result = op - val after = System.nanoTime() - val duration = (after - before).nanos - // Check result and results of policy - if em.isError(result) && resultPolicy.isWorthRetrying(em.getError(result)) then - actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) - Some(result) - else if resultPolicy.isSuccess(em.getT(result)) then - if duration > config.slowCallDurationThreshold then actorRef.tell(_.registerResult(CircuitBreakerResult.Slow, acquiredResult)) - else actorRef.tell(_.registerResult(CircuitBreakerResult.Success, acquiredResult)) - Some(result) - else - actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult)) - Some(result) - end if - else None - end if - end runEitherOrDrop + runOrDropWithErrorMode(EitherMode[E], resultPolicy)(operation) + + def runOrDrop[T](resultPolicy: ResultPolicy[Throwable, T] = ResultPolicy.default[Throwable, T])(operation: => T): Option[T] = + runOrDropEither(resultPolicy)(Try(operation).toEither).map(_.fold(throw _, identity)) end CircuitBreaker -private sealed trait CircuitBreakerStateMachine: - def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit - def updateState(): Unit - def calculateMetrics: Metrics - def state: CircuitBreakerState -// def selfRef: ActorRef[CircuitBreakerStateMachine] +private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateMachineConfig)(using val ox: Ox): + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult]): Metrics + def updateResults(result: CircuitBreakerResult): Unit + + @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed + + def state: CircuitBreakerState = _state + + def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = + updateResults(result) + val oldState = _state + val newState = nextState(calculateMetrics(Some(acquired)), oldState, System.currentTimeMillis()) + _state = newState + scheduleCallback(oldState, newState, selfRef) + end registerResult + + def updateState(): Unit = + _state = nextState(calculateMetrics(None), _state, System.currentTimeMillis()) + + private def scheduleCallback( + oldState: CircuitBreakerState, + newState: CircuitBreakerState, + selfRef: ActorRef[CircuitBreakerStateMachine] + ): Unit = + (oldState, newState) match + case (CircuitBreakerState.Closed, CircuitBreakerState.Open(_)) => + // schedule switch to halfOpen after timeout + updateAfter(config.waitDurationOpenState, selfRef) + case (CircuitBreakerState.Open(_), CircuitBreakerState.HalfOpen(since, semaphore, completedOperations)) => + // schedule timeout for halfOpen state + updateAfter(config.halfOpenTimeoutDuration, selfRef) + case _ => () + + private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine])(using Ox): Unit = + forkDiscard: + scheduled(ScheduledConfig(Schedule.InitialDelay(after)))(actorRef.tell(_.updateState())) + + private def nextState(metrics: Metrics, currentState: CircuitBreakerState, currentTimestamp: Long): CircuitBreakerState = + val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) + val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls + currentState match + case CircuitBreakerState.Closed => + if minCallsRecorder && exceededThreshold then + if config.waitDurationOpenState.toMillis != 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(currentTimestamp) + else CircuitBreakerState.Closed + case CircuitBreakerState.Open(since) => + val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis + if timePassed then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(since) + case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => + lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState + lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis + // if we didn't complete all half open calls but timeout is reached go back to open + if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + CircuitBreakerState.Open(currentTimestamp) + // If halfOpen calls were completed && rates are below we close breaker + else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed + // If halfOpen calls completed, but rates are still above go back to open + else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) + // We didn't complete all half open calls, keep halfOpen + else + lastAcquireResult match + case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => + CircuitBreakerState.HalfOpen(s, sem, completed + 1) + case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) + end if + end match + end nextState +end CircuitBreakerStateMachine private[resilience] object CircuitBreakerStateMachine: def apply(config: CircuitBreakerConfig)(using @@ -149,44 +211,19 @@ private[resilience] object CircuitBreakerStateMachine: end apply private[resilience] case class CircuitBreakerCountStateMachine( - config: CircuitBreakerStateMachineConfig, + stateMachineConfig: CircuitBreakerStateMachineConfig, windowSize: Int - )(using Ox) - extends CircuitBreakerStateMachine: - assert( - config.failureRateThreshold >= 0 && config.failureRateThreshold <= 100, - s"failureRateThreshold must be between 0 and 100, value: ${config.failureRateThreshold}" - ) - assert( - config.slowCallThreshold >= 0 && config.slowCallThreshold <= 100, - s"slowCallThreshold must be between 0 and 100, value: ${config.slowCallThreshold}" - ) + )(using ox: Ox) + extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) private var writeIndex = 0 - private var _state: CircuitBreakerState = CircuitBreakerState.Closed - - def state: CircuitBreakerState = _state - - def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = + def updateResults(result: CircuitBreakerResult): Unit = callResults(writeIndex) = Some(result) writeIndex = (writeIndex + 1) % windowSize - // In case of result coming from halfOpen state we update num of completed operation in this state if it didn't change - if acquired.circuitState == CircuitBreakerState.HalfOpen then - val newState = _state match - case CircuitBreakerState.HalfOpen(since, semaphore, completedOperations) => - CircuitBreakerState.HalfOpen(since, semaphore, completedOperations + 1) - case state => state - _state = newState - _state = nextState - end registerResult - - def updateState(): Unit = - _state = nextState - - def calculateMetrics: Metrics = + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult]): Metrics = val results = callResults.flatMap(identity) val numOfOperations = results.length val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt @@ -194,54 +231,18 @@ private[resilience] object CircuitBreakerStateMachine: Metrics( failuresRate, slowRate, - numOfOperations + numOfOperations, + lastAcquisitionResult ) end calculateMetrics - - private def nextState: CircuitBreakerState = - val metrics = calculateMetrics - val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) - val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls - _state match - case CircuitBreakerState.Closed => - if minCallsRecorder && exceededThreshold then - // schedule switch to halfOpen after timeout -// forkDiscard: -// scheduled(ScheduledConfig(Schedule.InitialDelay(config.waitDurationOpenState)))(selfRef.tell(_.updateState())) - CircuitBreakerState.Open(System.currentTimeMillis()) - else CircuitBreakerState.Closed - case CircuitBreakerState.Open(since) => - val timePassed = (System.currentTimeMillis() - since) > config.waitDurationOpenState.toMillis - if timePassed then CircuitBreakerState.HalfOpen(System.currentTimeMillis(), Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(since) - case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val timePassed = (System.currentTimeMillis() - since) > config.halfOpenTimeoutDuration.toMillis - // if we didn't complete all half open calls but timeout is reached go back to open - if !minCallsRecorder && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then - // schedule timeout for halfOpen state -// forkDiscard: -// scheduled(ScheduledConfig(Schedule.InitialDelay(config.halfOpenTimeoutDuration)))(selfRef.tell(_.updateState())) - CircuitBreakerState.Open(System.currentTimeMillis()) - // If halfOpen calls were completed && rates are below we close breaker - else if minCallsRecorder && !exceededThreshold then CircuitBreakerState.Open(System.currentTimeMillis()) - // If halfOpen calls completed, but rates are still above go back to open - else if minCallsRecorder && exceededThreshold - then CircuitBreakerState.Open(System.currentTimeMillis()) - // We didn't complete all half open calls, keep halfOpen - else CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) - end if - end match - end nextState end CircuitBreakerCountStateMachine private[resilience] case class CircuitBreakerTimeStateMachine( - config: CircuitBreakerStateMachineConfig, + stateMachineConfig: CircuitBreakerStateMachineConfig, windowDuration: FiniteDuration - )(using Ox) - extends CircuitBreakerStateMachine: - def registerResult(result: CircuitBreakerResult, acquired: AcquireResult): Unit = ??? - def updateState(): Unit = ??? - def calculateMetrics: Metrics = ??? - def state: CircuitBreakerState = ??? + )(using ox: Ox) + extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult]): Metrics = ??? + def updateResults(result: CircuitBreakerResult): Unit = ??? end CircuitBreakerTimeStateMachine end CircuitBreakerStateMachine diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 468a6cff..99a461c3 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -23,9 +23,9 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers: def f(): Either[String, String] = Left("boom") // when - val result1 = circuitBreaker.runEitherOrDrop(ResultPolicy.default)(f()) + val result1 = circuitBreaker.runOrDropEither(ResultPolicy.default)(f()) sleep(100.millis) // wait for state to register - val result2 = circuitBreaker.runEitherOrDrop(ResultPolicy.default)(f()) + val result2 = circuitBreaker.runOrDropEither(ResultPolicy.default)(f()) // then result1 shouldBe defined @@ -50,7 +50,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers: // when 0 to 50 foreach: _ => - circuitBreaker.runEitherOrDrop(ResultPolicy.default)(f()) + circuitBreaker.runOrDropEither(ResultPolicy.default)(f()) // then println(counter) From 5c94eda2c6237a002d6f6ce1a402a135f075b466 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Thu, 16 Jan 2025 15:58:53 +0100 Subject: [PATCH 08/26] fixes and tests for state machine --- .../scala/ox/resilience/CircuitBreaker.scala | 111 +++++++++--- .../CircuitBreakerStateMachineTest.scala | 168 ++++++++++++++++++ .../ox/resilience/CircuitBreakerTest.scala | 4 +- 3 files changed, 257 insertions(+), 26 deletions(-) create mode 100644 core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 9d0c8e64..6b89479d 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -25,7 +25,8 @@ case class Metrics( failureRate: Int, slowCallsRate: Int, operationsInWindow: Int, - lastAcquisitionResult: Option[AcquireResult] + lastAcquisitionResult: Option[AcquireResult], + timestamp: Long ) enum SlidingWindow: @@ -37,7 +38,7 @@ case class CircuitBreakerConfig( slowCallThreshold: Int = 0, slowCallDurationThreshold: FiniteDuration = 60.seconds, slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), - minimumNumberOfCalls: Int = 100, + minimumNumberOfCalls: Int = 20, waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS), halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), numberOfCallsInHalfOpenState: Int = 10 @@ -50,6 +51,10 @@ case class CircuitBreakerConfig( slowCallThreshold >= 0 && slowCallThreshold <= 100, s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold" ) + assert( + numberOfCallsInHalfOpenState > 0, + s"numberOfCallsInHalfOpenState must be greater than 0, value: $numberOfCallsInHalfOpenState" + ) end CircuitBreakerConfig private case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) @@ -81,9 +86,12 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) private def tryAcquire: AcquireResult = stateMachine.state match - case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) - case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) - case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) + case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) + case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) + case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => + val a = semaphore.tryAcquire(1) + if a then println("Acquired from semaphore") + AcquireResult(a, currState) def runOrDropWithErrorMode[E, F[_], T](em: ErrorMode[E, F], resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( operation: => F[T] @@ -94,18 +102,14 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): val result = operation val after = System.nanoTime() val duration = (after - before).nanos - // Check result and results of policy - if em.isError(result) && resultPolicy.isWorthRetrying(em.getError(result)) then + if em.isError(result) then actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult, actorRef)) Some(result) - else if resultPolicy.isSuccess(em.getT(result)) then + else if duration > config.slowCallDurationThreshold then actorRef.tell(_.registerResult(CircuitBreakerResult.Slow, acquiredResult, actorRef)) else actorRef.tell(_.registerResult(CircuitBreakerResult.Success, acquiredResult, actorRef)) Some(result) - else - actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult, actorRef)) - Some(result) end if else None end if @@ -121,8 +125,9 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): end CircuitBreaker private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateMachineConfig)(using val ox: Ox): - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult]): Metrics + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics def updateResults(result: CircuitBreakerResult): Unit + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed @@ -131,13 +136,18 @@ private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateM def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = updateResults(result) val oldState = _state - val newState = nextState(calculateMetrics(Some(acquired)), oldState, System.currentTimeMillis()) + val newState = nextState(calculateMetrics(Some(acquired), System.currentTimeMillis()), oldState) _state = newState scheduleCallback(oldState, newState, selfRef) + onStateChange(oldState, newState) end registerResult - def updateState(): Unit = - _state = nextState(calculateMetrics(None), _state, System.currentTimeMillis()) + def updateState(selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = + val oldState = _state + val newState = nextState(calculateMetrics(None, System.currentTimeMillis()), oldState) + _state = newState + scheduleCallback(oldState, newState, selfRef) + onStateChange(oldState, newState) private def scheduleCallback( oldState: CircuitBreakerState, @@ -149,28 +159,30 @@ private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateM // schedule switch to halfOpen after timeout updateAfter(config.waitDurationOpenState, selfRef) case (CircuitBreakerState.Open(_), CircuitBreakerState.HalfOpen(since, semaphore, completedOperations)) => - // schedule timeout for halfOpen state - updateAfter(config.halfOpenTimeoutDuration, selfRef) + // schedule timeout for halfOpen state if is not 0 + if config.halfOpenTimeoutDuration.toMillis != 0 then updateAfter(config.halfOpenTimeoutDuration, selfRef) case _ => () private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine])(using Ox): Unit = forkDiscard: - scheduled(ScheduledConfig(Schedule.InitialDelay(after)))(actorRef.tell(_.updateState())) + scheduled(ScheduledConfig(Schedule.InitialDelay(after)))(actorRef.tell(_.updateState(actorRef))) - private def nextState(metrics: Metrics, currentState: CircuitBreakerState, currentTimestamp: Long): CircuitBreakerState = + private[resilience] def nextState(metrics: Metrics, currentState: CircuitBreakerState): CircuitBreakerState = + val currentTimestamp = metrics.timestamp val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls currentState match case CircuitBreakerState.Closed => if minCallsRecorder && exceededThreshold then - if config.waitDurationOpenState.toMillis != 0 then + if config.waitDurationOpenState.toMillis == 0 then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(currentTimestamp) else CircuitBreakerState.Closed case CircuitBreakerState.Open(since) => val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis - if timePassed then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + if timePassed || config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(since) case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState @@ -219,11 +231,28 @@ private[resilience] object CircuitBreakerStateMachine: private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) private var writeIndex = 0 + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = + import CircuitBreakerState.* + // we have to match so we don't reset result when for example incrementing completed calls in halfopen state + (oldState, newState) match + case (Closed, Open(_) | HalfOpen(_, _, _)) => + callResults.mapInPlace(_ => None).discard + writeIndex = 0 + case (HalfOpen(_, _, _), Open(_) | Closed) => + callResults.mapInPlace(_ => None).discard + writeIndex = 0 + case (Open(_), Closed | HalfOpen(_, _, _)) => + callResults.mapInPlace(_ => None).discard + writeIndex = 0 + case (_, _) => () + end match + end onStateChange + def updateResults(result: CircuitBreakerResult): Unit = callResults(writeIndex) = Some(result) writeIndex = (writeIndex + 1) % windowSize - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult]): Metrics = + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = val results = callResults.flatMap(identity) val numOfOperations = results.length val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt @@ -232,7 +261,8 @@ private[resilience] object CircuitBreakerStateMachine: failuresRate, slowRate, numOfOperations, - lastAcquisitionResult + lastAcquisitionResult, + timestamp ) end calculateMetrics end CircuitBreakerCountStateMachine @@ -242,7 +272,38 @@ private[resilience] object CircuitBreakerStateMachine: windowDuration: FiniteDuration )(using ox: Ox) extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult]): Metrics = ??? - def updateResults(result: CircuitBreakerResult): Unit = ??? + + // holds timestamp of recored operation and result + private val queue = collection.mutable.Queue[(Long, CircuitBreakerResult)]() + + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = + // filter all entries that happend outside sliding window + val results = queue.filter((time, _) => timestamp > time + windowDuration.toMillis) + val numOfOperations = results.length + val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / results.length.toFloat) * 100).toInt + val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / results.length.toFloat) * 100).toInt + Metrics( + failuresRate, + slowRate, + numOfOperations, + lastAcquisitionResult, + timestamp + ) + end calculateMetrics + def updateResults(result: CircuitBreakerResult): Unit = + queue.addOne((System.currentTimeMillis(), result)) + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = + import CircuitBreakerState.* + // we have to match so we don't reset result when for example incrementing completed calls in halfopen state + (oldState, newState) match + case (Closed, Open(_) | HalfOpen(_, _, _)) => + queue.clearAndShrink(config.minimumNumberOfCalls) + case (HalfOpen(_, _, _), Open(_) | Closed) => + queue.clearAndShrink(config.minimumNumberOfCalls) + case (Open(_), Closed | HalfOpen(_, _, _)) => + queue.clearAndShrink(config.minimumNumberOfCalls) + case (_, _) => () + end match + end onStateChange end CircuitBreakerTimeStateMachine end CircuitBreakerStateMachine diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala new file mode 100644 index 00000000..ccfc1f8a --- /dev/null +++ b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala @@ -0,0 +1,168 @@ +package ox.resilience + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import scala.concurrent.duration.* +import ox.* +import java.util.concurrent.TimeUnit +import java.util.concurrent.Semaphore + +class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: + + behavior of "Circuit Breaker state machine" + + it should "keep closed with healthy metrics" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val lastResult: Option[AcquireResult] = None + val metrics = + Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) + + // when + val resultingState = stateMachine.nextState(metrics, CircuitBreakerState.Closed) + + resultingState shouldBe CircuitBreakerState.Closed + } + + it should "go to open after surpasing failure threshold" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val lastResult: Option[AcquireResult] = None + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) + + // when + val resultingState = stateMachine.nextState(metrics, CircuitBreakerState.Closed) + + // then + resultingState shouldBe a[CircuitBreakerState.Open] + } + + it should "go straight to half open after surpasing failure threshold with defined waitDurationOpenState = 0" in supervised { + // given + val config = defaultConfig.copy(waitDurationOpenState = FiniteDuration(0, TimeUnit.MILLISECONDS)) + val stateMachine = CircuitBreakerStateMachine(config) + val lastResult: Option[AcquireResult] = None + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) + + // when + val resultingState = stateMachine.nextState(metrics, CircuitBreakerState.Closed) + + // then + resultingState shouldBe a[CircuitBreakerState.HalfOpen] + } + + it should "go back to open after timeout in half open passed" in supervised { + // given + val config = defaultConfig.copy(halfOpenTimeoutDuration = FiniteDuration(10, TimeUnit.SECONDS)) + val stateMachine = CircuitBreakerStateMachine(config) + val lastResult: Option[AcquireResult] = None + val timestamp = System.currentTimeMillis() + val metrics = Metrics( + badFailureRate, + hundredPercentSuccessRate, + config.minimumNumberOfCalls, + lastResult, + timestamp + 15.seconds.toMillis // after timeout + ) + + // when + val resultingState = + stateMachine.nextState(metrics, CircuitBreakerState.HalfOpen(timestamp, Semaphore(10), 0)) + + // then + resultingState shouldBe a[CircuitBreakerState.Open] + } + + it should "update counter of completed operations in halfopen state" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val completedCalls = 0 + val timestamp = System.currentTimeMillis() + val state: CircuitBreakerState.HalfOpen = CircuitBreakerState.HalfOpen(timestamp, Semaphore(10), completedCalls) + val lastResult: Option[AcquireResult] = Some(AcquireResult(true, state)) + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, timestamp) + + // when + val resultingState = stateMachine.nextState(metrics, state) + + // then + resultingState shouldBe a[CircuitBreakerState.HalfOpen] + resultingState.asInstanceOf[CircuitBreakerState.HalfOpen].completedOperations shouldBe state.completedOperations + 1 + } + + it should "go back to closed after enough calls with good metrics are recorded" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val completedCalls = config.numberOfCallsInHalfOpenState + val timestamp = System.currentTimeMillis() + val state = CircuitBreakerState.HalfOpen(timestamp, Semaphore(0), completedCalls) + val lastResult: Option[AcquireResult] = Some(AcquireResult(true, state)) + val metrics = Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.numberOfCallsInHalfOpenState, lastResult, timestamp) + + // when + val resultingState = stateMachine.nextState(metrics, state) + + // then + resultingState shouldBe CircuitBreakerState.Closed + } + + it should "go to open after enough calls with bad metrics are recorded" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val completedCalls = config.numberOfCallsInHalfOpenState + val timestamp = System.currentTimeMillis() + val state = CircuitBreakerState.HalfOpen(timestamp, Semaphore(0), completedCalls) + val lastResult: Option[AcquireResult] = Some(AcquireResult(true, state)) + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.numberOfCallsInHalfOpenState, lastResult, timestamp) + + // when + val resultingState = stateMachine.nextState(metrics, state) + + // then + resultingState shouldBe a[CircuitBreakerState.Open] + } + + it should "go to half open after waitDurationOpenState passes" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val currentTimestamp = System.currentTimeMillis() + val state = CircuitBreakerState.Open(currentTimestamp) + val lastResult: Option[AcquireResult] = None + val metrics = + Metrics( + badFailureRate, + hundredPercentSuccessRate, + config.minimumNumberOfCalls, + lastResult, + currentTimestamp + 15.seconds.toMillis // after wait time + ) + + // when + val resultingState = stateMachine.nextState(metrics, state) + + // then + resultingState shouldBe a[CircuitBreakerState.HalfOpen] + } + + private val defaultConfig: CircuitBreakerConfig = + CircuitBreakerConfig( + failureRateThreshold = 50, + slowCallThreshold = 50, + slowCallDurationThreshold = 60.seconds, + slidingWindow = SlidingWindow.CountBased(100), + minimumNumberOfCalls = 20, + waitDurationOpenState = FiniteDuration(10, java.util.concurrent.TimeUnit.SECONDS), + halfOpenTimeoutDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), + numberOfCallsInHalfOpenState = 10 + ) + + private val hundredPercentSuccessRate = 0 + private val badFailureRate = 100 + +end CircuitBreakerStateMachineTest diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 99a461c3..8b9cd47b 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -16,7 +16,8 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers: CircuitBreakerConfig( failureRateThreshold = thresholdRate, minimumNumberOfCalls = numberOfOperations, - slidingWindow = SlidingWindow.CountBased(numberOfOperations) + slidingWindow = SlidingWindow.CountBased(numberOfOperations), + numberOfCallsInHalfOpenState = 0 ) ) @@ -45,6 +46,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers: ) var counter = 0 def f(): Either[String, String] = + sleep(100.millis) counter += 1 Left("boom") From 0a26e2f4c77f611adb91e5354d108c9a817a3c22 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Thu, 16 Jan 2025 16:27:39 +0100 Subject: [PATCH 09/26] scaladoc, move logic to different files --- .../scala/ox/resilience/CircuitBreaker.scala | 276 +++--------------- .../ox/resilience/CircuitBreakerConfig.scala | 62 ++++ .../CircuitBreakerStateMachine.scala | 192 ++++++++++++ .../ox/resilience/CircuitBreakerTest.scala | 31 +- 4 files changed, 301 insertions(+), 260 deletions(-) create mode 100644 core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala create mode 100644 core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 6b89479d..b40963a5 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -2,26 +2,23 @@ package ox.resilience import scala.concurrent.duration.* import ox.* -import java.util.concurrent.TimeUnit -import ox.scheduling.scheduled -import ox.scheduling.{ScheduledConfig, Schedule} import java.util.concurrent.Semaphore import ox.channels.Actor import ox.channels.BufferCapacity import ox.channels.ActorRef import scala.util.Try -enum CircuitBreakerState: +private[resilience] enum CircuitBreakerState: case Open(since: Long) case Closed case HalfOpen(since: Long, semaphore: Semaphore, completedOperations: Int = 0) -enum CircuitBreakerResult: +private[resilience] enum CircuitBreakerResult: case Success case Failure case Slow -case class Metrics( +private[resilience] case class Metrics( failureRate: Int, slowCallsRate: Int, operationsInWindow: Int, @@ -29,35 +26,7 @@ case class Metrics( timestamp: Long ) -enum SlidingWindow: - case CountBased(windowSize: Int) - case TimeBased(duration: FiniteDuration) - -case class CircuitBreakerConfig( - failureRateThreshold: Int = 50, - slowCallThreshold: Int = 0, - slowCallDurationThreshold: FiniteDuration = 60.seconds, - slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), - minimumNumberOfCalls: Int = 20, - waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS), - halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), - numberOfCallsInHalfOpenState: Int = 10 -): - assert( - failureRateThreshold >= 0 && failureRateThreshold <= 100, - s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold" - ) - assert( - slowCallThreshold >= 0 && slowCallThreshold <= 100, - s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold" - ) - assert( - numberOfCallsInHalfOpenState > 0, - s"numberOfCallsInHalfOpenState must be greater than 0, value: $numberOfCallsInHalfOpenState" - ) -end CircuitBreakerConfig - -private case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) +private[resilience] case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) private case class CircuitBreakerStateMachineConfig( failureRateThreshold: Int, @@ -81,19 +50,27 @@ private object CircuitBreakerStateMachineConfig: ) end CircuitBreakerStateMachineConfig +/** Circuit Breaker. Operations can be dropped, when the breaker is open or if it doesn't take more operation in halfOpen state. The Circuit + * Breaker might calculate different metrics based on [[SlidingWindow]] provided in config. See [[SlidingWindow]] for more details. + */ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): val stateMachine = CircuitBreakerStateMachine(config) private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) private def tryAcquire: AcquireResult = stateMachine.state match - case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) - case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) - case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => - val a = semaphore.tryAcquire(1) - if a then println("Acquired from semaphore") - AcquireResult(a, currState) - - def runOrDropWithErrorMode[E, F[_], T](em: ErrorMode[E, F], resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( + case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) + case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) + case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) + + /** Runs the operation using the given error mode or drops it if the breaker is open. + * @param em + * The error mode to use, which specifies when a result value is considered success, and when a failure. + * @param operation + * The operation to run. + * @return + * `Some` if the operation has been run, `None` if the operation has been dropped. + */ + def runOrDropWithErrorMode[E, F[_], T](em: ErrorMode[E, F])( operation: => F[T] ): Option[F[T]] = val acquiredResult = tryAcquire @@ -115,195 +92,30 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): end if end runOrDropWithErrorMode - def runOrDropEither[E, T](resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T])( + /** Runs the operation returning [[scala.util.Either]] or drops it if the breaker is open. Note that any exceptions thrown by the + * operation aren't caught and are propagated to user. + * + * @param operation + * The operation to run. + * @return + * `Some` if the operation has been run, `None` if the operation has been dropped. + * @throws anything + * The exception thrown by operation. + */ + def runOrDropEither[E, T]( operation: => Either[E, T] ): Option[Either[E, T]] = - runOrDropWithErrorMode(EitherMode[E], resultPolicy)(operation) - - def runOrDrop[T](resultPolicy: ResultPolicy[Throwable, T] = ResultPolicy.default[Throwable, T])(operation: => T): Option[T] = - runOrDropEither(resultPolicy)(Try(operation).toEither).map(_.fold(throw _, identity)) + runOrDropWithErrorMode(EitherMode[E])(operation) + + /** Runs the operation or drops it if the breaker is open returning a direct result wrapped in [[Option]] + * + * @param operation + * The operation to run. + * @return + * `Some` if the operation has been run, `None` if the operation has been dropped. + * @throws anything + * The exception thrown by operation. + */ + def runOrDrop[T](operation: => T): Option[T] = + runOrDropEither(Try(operation).toEither).map(_.fold(throw _, identity)) end CircuitBreaker - -private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateMachineConfig)(using val ox: Ox): - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics - def updateResults(result: CircuitBreakerResult): Unit - def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit - - @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed - - def state: CircuitBreakerState = _state - - def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = - updateResults(result) - val oldState = _state - val newState = nextState(calculateMetrics(Some(acquired), System.currentTimeMillis()), oldState) - _state = newState - scheduleCallback(oldState, newState, selfRef) - onStateChange(oldState, newState) - end registerResult - - def updateState(selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = - val oldState = _state - val newState = nextState(calculateMetrics(None, System.currentTimeMillis()), oldState) - _state = newState - scheduleCallback(oldState, newState, selfRef) - onStateChange(oldState, newState) - - private def scheduleCallback( - oldState: CircuitBreakerState, - newState: CircuitBreakerState, - selfRef: ActorRef[CircuitBreakerStateMachine] - ): Unit = - (oldState, newState) match - case (CircuitBreakerState.Closed, CircuitBreakerState.Open(_)) => - // schedule switch to halfOpen after timeout - updateAfter(config.waitDurationOpenState, selfRef) - case (CircuitBreakerState.Open(_), CircuitBreakerState.HalfOpen(since, semaphore, completedOperations)) => - // schedule timeout for halfOpen state if is not 0 - if config.halfOpenTimeoutDuration.toMillis != 0 then updateAfter(config.halfOpenTimeoutDuration, selfRef) - case _ => () - - private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine])(using Ox): Unit = - forkDiscard: - scheduled(ScheduledConfig(Schedule.InitialDelay(after)))(actorRef.tell(_.updateState(actorRef))) - - private[resilience] def nextState(metrics: Metrics, currentState: CircuitBreakerState): CircuitBreakerState = - val currentTimestamp = metrics.timestamp - val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) - val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) - val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls - currentState match - case CircuitBreakerState.Closed => - if minCallsRecorder && exceededThreshold then - if config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(currentTimestamp) - else CircuitBreakerState.Closed - case CircuitBreakerState.Open(since) => - val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis - if timePassed || config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(since) - case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState - lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis - // if we didn't complete all half open calls but timeout is reached go back to open - if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then - CircuitBreakerState.Open(currentTimestamp) - // If halfOpen calls were completed && rates are below we close breaker - else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed - // If halfOpen calls completed, but rates are still above go back to open - else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) - // We didn't complete all half open calls, keep halfOpen - else - lastAcquireResult match - case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => - CircuitBreakerState.HalfOpen(s, sem, completed + 1) - case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) - end if - end match - end nextState -end CircuitBreakerStateMachine - -private[resilience] object CircuitBreakerStateMachine: - def apply(config: CircuitBreakerConfig)(using - Ox - ): CircuitBreakerStateMachine = - config.slidingWindow match - case SlidingWindow.CountBased(size) => - CircuitBreakerCountStateMachine( - CircuitBreakerStateMachineConfig.fromConfig(config), - size - ) - case SlidingWindow.TimeBased(duration) => - CircuitBreakerTimeStateMachine( - CircuitBreakerStateMachineConfig.fromConfig(config), - duration - ) - end apply - - private[resilience] case class CircuitBreakerCountStateMachine( - stateMachineConfig: CircuitBreakerStateMachineConfig, - windowSize: Int - )(using ox: Ox) - extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): - - private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) - private var writeIndex = 0 - - def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = - import CircuitBreakerState.* - // we have to match so we don't reset result when for example incrementing completed calls in halfopen state - (oldState, newState) match - case (Closed, Open(_) | HalfOpen(_, _, _)) => - callResults.mapInPlace(_ => None).discard - writeIndex = 0 - case (HalfOpen(_, _, _), Open(_) | Closed) => - callResults.mapInPlace(_ => None).discard - writeIndex = 0 - case (Open(_), Closed | HalfOpen(_, _, _)) => - callResults.mapInPlace(_ => None).discard - writeIndex = 0 - case (_, _) => () - end match - end onStateChange - - def updateResults(result: CircuitBreakerResult): Unit = - callResults(writeIndex) = Some(result) - writeIndex = (writeIndex + 1) % windowSize - - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = - val results = callResults.flatMap(identity) - val numOfOperations = results.length - val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt - val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt - Metrics( - failuresRate, - slowRate, - numOfOperations, - lastAcquisitionResult, - timestamp - ) - end calculateMetrics - end CircuitBreakerCountStateMachine - - private[resilience] case class CircuitBreakerTimeStateMachine( - stateMachineConfig: CircuitBreakerStateMachineConfig, - windowDuration: FiniteDuration - )(using ox: Ox) - extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): - - // holds timestamp of recored operation and result - private val queue = collection.mutable.Queue[(Long, CircuitBreakerResult)]() - - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = - // filter all entries that happend outside sliding window - val results = queue.filter((time, _) => timestamp > time + windowDuration.toMillis) - val numOfOperations = results.length - val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / results.length.toFloat) * 100).toInt - val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / results.length.toFloat) * 100).toInt - Metrics( - failuresRate, - slowRate, - numOfOperations, - lastAcquisitionResult, - timestamp - ) - end calculateMetrics - def updateResults(result: CircuitBreakerResult): Unit = - queue.addOne((System.currentTimeMillis(), result)) - def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = - import CircuitBreakerState.* - // we have to match so we don't reset result when for example incrementing completed calls in halfopen state - (oldState, newState) match - case (Closed, Open(_) | HalfOpen(_, _, _)) => - queue.clearAndShrink(config.minimumNumberOfCalls) - case (HalfOpen(_, _, _), Open(_) | Closed) => - queue.clearAndShrink(config.minimumNumberOfCalls) - case (Open(_), Closed | HalfOpen(_, _, _)) => - queue.clearAndShrink(config.minimumNumberOfCalls) - case (_, _) => () - end match - end onStateChange - end CircuitBreakerTimeStateMachine -end CircuitBreakerStateMachine diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala new file mode 100644 index 00000000..228afa5f --- /dev/null +++ b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala @@ -0,0 +1,62 @@ +package ox.resilience + +import scala.concurrent.duration.* +import java.util.concurrent.TimeUnit + +/** Allows to configure how [[Metrics]] will be calculated + */ +enum SlidingWindow: + /** Window counting last n operations when calculating metrics. + * @param windowSize + * number of last n results recored. + */ + case CountBased(windowSize: Int) + + /** Window counting operations in the lapse of `duraiton` before current time. + * @param duration + * span of time where results are considered for including in metrics. + */ + case TimeBased(duration: FiniteDuration) +end SlidingWindow + +/** @param failureRateThreshold + * threshold, as percentage of operations that ended in failure + * @param slowCallThreshold + * threshold, as percentage of operations that spanned more then [[slowCallDurationThreshold]]. + * @param slowCallDurationThreshold + * time after which operation is considered slow. + * @param slidingWindow + * configures how thresholds will be calculated. See [[SlidingWindow]] for more details. + * @param minimumNumberOfCalls + * minimum number of results that must be registered before metrics are calculated. + * @param waitDurationOpenState + * how much time will pass before breaker will switch from open to half open state. + * @param halfOpenTimeoutDuration + * time out after which, if not enough calls where registered in half open state, breaker will go back to open state. + * @param numberOfCallsInHalfOpenState + * number of results that must be registered to calculate metrics and decide if breaker should go back to open state or close. This is + * also maximum number of operations that can be started in half open state. + */ +case class CircuitBreakerConfig( + failureRateThreshold: Int = 50, + slowCallThreshold: Int = 0, + slowCallDurationThreshold: FiniteDuration = 60.seconds, + slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), + minimumNumberOfCalls: Int = 20, + waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS), + halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), + numberOfCallsInHalfOpenState: Int = 10 +): + assert( + failureRateThreshold >= 0 && failureRateThreshold <= 100, + s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold" + ) + assert( + slowCallThreshold >= 0 && slowCallThreshold <= 100, + s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold" + ) + assert( + numberOfCallsInHalfOpenState > 0, + s"numberOfCallsInHalfOpenState must be greater than 0, value: $numberOfCallsInHalfOpenState" + ) +end CircuitBreakerConfig diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala new file mode 100644 index 00000000..e6e20de9 --- /dev/null +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -0,0 +1,192 @@ +package ox.resilience + +import scala.concurrent.duration.* +import ox.* +import ox.scheduling.scheduled +import ox.scheduling.{ScheduledConfig, Schedule} +import java.util.concurrent.Semaphore +import ox.channels.ActorRef + +private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateMachineConfig)(using val ox: Ox): + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics + def updateResults(result: CircuitBreakerResult): Unit + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit + + @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed + + def state: CircuitBreakerState = _state + + def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = + updateResults(result) + val oldState = _state + val newState = nextState(calculateMetrics(Some(acquired), System.currentTimeMillis()), oldState) + _state = newState + scheduleCallback(oldState, newState, selfRef) + onStateChange(oldState, newState) + end registerResult + + def updateState(selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = + val oldState = _state + val newState = nextState(calculateMetrics(None, System.currentTimeMillis()), oldState) + _state = newState + scheduleCallback(oldState, newState, selfRef) + onStateChange(oldState, newState) + + private def scheduleCallback( + oldState: CircuitBreakerState, + newState: CircuitBreakerState, + selfRef: ActorRef[CircuitBreakerStateMachine] + ): Unit = + (oldState, newState) match + case (CircuitBreakerState.Closed, CircuitBreakerState.Open(_)) => + // schedule switch to halfOpen after timeout + updateAfter(config.waitDurationOpenState, selfRef) + case (CircuitBreakerState.Open(_), CircuitBreakerState.HalfOpen(since, semaphore, completedOperations)) => + // schedule timeout for halfOpen state if is not 0 + if config.halfOpenTimeoutDuration.toMillis != 0 then updateAfter(config.halfOpenTimeoutDuration, selfRef) + case _ => () + + private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine])(using Ox): Unit = + forkDiscard: + scheduled(ScheduledConfig(Schedule.InitialDelay(after)))(actorRef.tell(_.updateState(actorRef))) + + private[resilience] def nextState(metrics: Metrics, currentState: CircuitBreakerState): CircuitBreakerState = + val currentTimestamp = metrics.timestamp + val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) + val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls + currentState match + case CircuitBreakerState.Closed => + if minCallsRecorder && exceededThreshold then + if config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(currentTimestamp) + else CircuitBreakerState.Closed + case CircuitBreakerState.Open(since) => + val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis + if timePassed || config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(since) + case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => + lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState + lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis + // if we didn't complete all half open calls but timeout is reached go back to open + if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + CircuitBreakerState.Open(currentTimestamp) + // If halfOpen calls were completed && rates are below we close breaker + else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed + // If halfOpen calls completed, but rates are still above go back to open + else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) + // We didn't complete all half open calls, keep halfOpen + else + lastAcquireResult match + case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => + CircuitBreakerState.HalfOpen(s, sem, completed + 1) + case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) + end if + end match + end nextState +end CircuitBreakerStateMachine + +private[resilience] object CircuitBreakerStateMachine: + def apply(config: CircuitBreakerConfig)(using + Ox + ): CircuitBreakerStateMachine = + config.slidingWindow match + case SlidingWindow.CountBased(size) => + CircuitBreakerCountStateMachine( + CircuitBreakerStateMachineConfig.fromConfig(config), + size + ) + case SlidingWindow.TimeBased(duration) => + CircuitBreakerTimeStateMachine( + CircuitBreakerStateMachineConfig.fromConfig(config), + duration + ) + end apply + + private[resilience] case class CircuitBreakerCountStateMachine( + stateMachineConfig: CircuitBreakerStateMachineConfig, + windowSize: Int + )(using ox: Ox) + extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): + + private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) + private var writeIndex = 0 + + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = + import CircuitBreakerState.* + // we have to match so we don't reset result when for example incrementing completed calls in halfopen state + (oldState, newState) match + case (Closed, Open(_) | HalfOpen(_, _, _)) => + callResults.mapInPlace(_ => None).discard + writeIndex = 0 + case (HalfOpen(_, _, _), Open(_) | Closed) => + callResults.mapInPlace(_ => None).discard + writeIndex = 0 + case (Open(_), Closed | HalfOpen(_, _, _)) => + callResults.mapInPlace(_ => None).discard + writeIndex = 0 + case (_, _) => () + end match + end onStateChange + + def updateResults(result: CircuitBreakerResult): Unit = + callResults(writeIndex) = Some(result) + writeIndex = (writeIndex + 1) % windowSize + + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = + val results = callResults.flatMap(identity) + val numOfOperations = results.length + val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt + val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt + Metrics( + failuresRate, + slowRate, + numOfOperations, + lastAcquisitionResult, + timestamp + ) + end calculateMetrics + end CircuitBreakerCountStateMachine + + private[resilience] case class CircuitBreakerTimeStateMachine( + stateMachineConfig: CircuitBreakerStateMachineConfig, + windowDuration: FiniteDuration + )(using ox: Ox) + extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): + + // holds timestamp of recored operation and result + private val queue = collection.mutable.Queue[(Long, CircuitBreakerResult)]() + + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = + // filter all entries that happend outside sliding window + val results = queue.filter((time, _) => timestamp > time + windowDuration.toMillis) + val numOfOperations = results.length + val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / results.length.toFloat) * 100).toInt + val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / results.length.toFloat) * 100).toInt + Metrics( + failuresRate, + slowRate, + numOfOperations, + lastAcquisitionResult, + timestamp + ) + end calculateMetrics + def updateResults(result: CircuitBreakerResult): Unit = + queue.addOne((System.currentTimeMillis(), result)) + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = + import CircuitBreakerState.* + // we have to match so we don't reset result when for example incrementing completed calls in halfopen state + (oldState, newState) match + case (Closed, Open(_) | HalfOpen(_, _, _)) => + queue.clearAndShrink(config.minimumNumberOfCalls) + case (HalfOpen(_, _, _), Open(_) | Closed) => + queue.clearAndShrink(config.minimumNumberOfCalls) + case (Open(_), Closed | HalfOpen(_, _, _)) => + queue.clearAndShrink(config.minimumNumberOfCalls) + case (_, _) => () + end match + end onStateChange + end CircuitBreakerTimeStateMachine +end CircuitBreakerStateMachine diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 8b9cd47b..405739e3 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -17,45 +17,20 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers: failureRateThreshold = thresholdRate, minimumNumberOfCalls = numberOfOperations, slidingWindow = SlidingWindow.CountBased(numberOfOperations), - numberOfCallsInHalfOpenState = 0 + numberOfCallsInHalfOpenState = 1 ) ) def f(): Either[String, String] = Left("boom") // when - val result1 = circuitBreaker.runOrDropEither(ResultPolicy.default)(f()) + val result1 = circuitBreaker.runOrDropEither(f()) sleep(100.millis) // wait for state to register - val result2 = circuitBreaker.runOrDropEither(ResultPolicy.default)(f()) + val result2 = circuitBreaker.runOrDropEither(f()) // then result1 shouldBe defined result2 shouldBe empty } - it should "run" in supervised { - // given - val thresholdRate = 100 - val numberOfOperations = 10 - val circuitBreaker = CircuitBreaker( - CircuitBreakerConfig( - failureRateThreshold = thresholdRate, - minimumNumberOfCalls = numberOfOperations, - slidingWindow = SlidingWindow.CountBased(numberOfOperations) - ) - ) - var counter = 0 - def f(): Either[String, String] = - sleep(100.millis) - counter += 1 - Left("boom") - - // when - 0 to 50 foreach: _ => - circuitBreaker.runOrDropEither(ResultPolicy.default)(f()) - - // then - println(counter) - } - end CircuitBreakerTest From 20ed3ff4c671fb3309e5ec5c0873ef1caa47d1dc Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 20 Jan 2025 12:53:07 +0100 Subject: [PATCH 10/26] Refactor nextState to object, use composition instead of inheritance --- .../ox/resilience/AtomicCircularBuffer.scala | 29 ---- .../scala/ox/resilience/CircuitBreaker.scala | 7 +- .../CircuitBreakerStateMachine.scala | 142 +++++++++--------- core/src/main/scala/ox/util.scala | 10 +- .../CircuitBreakerStateMachineTest.scala | 16 +- 5 files changed, 87 insertions(+), 117 deletions(-) delete mode 100644 core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala diff --git a/core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala b/core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala deleted file mode 100644 index 40b975e9..00000000 --- a/core/src/main/scala/ox/resilience/AtomicCircularBuffer.scala +++ /dev/null @@ -1,29 +0,0 @@ -package ox.resilience - -import scala.reflect.ClassTag -import java.util.concurrent.Semaphore - -class AtomicCircularBuffer[T: ClassTag](size: Int): - private val semaphore = Semaphore(1) - private var writeIndex = 0 - private var readIndex = 0 - private val buffer = Array.fill[Option[T]](size)(None) - def push(item: T): Unit = - semaphore.acquire() - try - buffer(writeIndex) = Some(item) - writeIndex = (writeIndex + 1) % size - finally semaphore.release() - def pop: Option[T] = - semaphore.acquire() - try - val result = buffer(readIndex) - readIndex = (readIndex + 1) % size - result - finally semaphore.release() - def peak: Option[T] = buffer(readIndex) - def snapshot: Array[T] = - semaphore.acquire() - try buffer.clone().flatMap(identity) - finally semaphore.release() -end AtomicCircularBuffer diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index b40963a5..8e1b85df 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -54,7 +54,7 @@ end CircuitBreakerStateMachineConfig * Breaker might calculate different metrics based on [[SlidingWindow]] provided in config. See [[SlidingWindow]] for more details. */ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): - val stateMachine = CircuitBreakerStateMachine(config) + private val stateMachine = CircuitBreakerStateMachine(config) private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) private def tryAcquire: AcquireResult = stateMachine.state match @@ -75,10 +75,7 @@ class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): ): Option[F[T]] = val acquiredResult = tryAcquire if acquiredResult.acquired then - val before = System.nanoTime() - val result = operation - val after = System.nanoTime() - val duration = (after - before).nanos + val (duration, result) = timed(operation) if em.isError(result) then actorRef.tell(_.registerResult(CircuitBreakerResult.Failure, acquiredResult, actorRef)) Some(result) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index e6e20de9..cfd41103 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -2,35 +2,29 @@ package ox.resilience import scala.concurrent.duration.* import ox.* -import ox.scheduling.scheduled -import ox.scheduling.{ScheduledConfig, Schedule} import java.util.concurrent.Semaphore import ox.channels.ActorRef +import ox.resilience.CircuitBreakerStateMachine.nextState -private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateMachineConfig)(using val ox: Ox): - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics - def updateResults(result: CircuitBreakerResult): Unit - def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit - +private[resilience] case class CircuitBreakerStateMachine( + config: CircuitBreakerStateMachineConfig, + results: CircuitBreakerStateMachine.CircuitBreakerResults +)(using val ox: Ox): @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed def state: CircuitBreakerState = _state def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = - updateResults(result) - val oldState = _state - val newState = nextState(calculateMetrics(Some(acquired), System.currentTimeMillis()), oldState) - _state = newState - scheduleCallback(oldState, newState, selfRef) - onStateChange(oldState, newState) + results.updateResults(result) + updateState(selfRef, Some(acquired)) end registerResult - def updateState(selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = + def updateState(selfRef: ActorRef[CircuitBreakerStateMachine], acquiredResult: Option[AcquireResult] = None): Unit = val oldState = _state - val newState = nextState(calculateMetrics(None, System.currentTimeMillis()), oldState) + val newState = nextState(results.calculateMetrics(None, System.currentTimeMillis()), oldState, config) _state = newState scheduleCallback(oldState, newState, selfRef) - onStateChange(oldState, newState) + results.onStateChange(oldState, newState) private def scheduleCallback( oldState: CircuitBreakerState, @@ -46,46 +40,11 @@ private sealed trait CircuitBreakerStateMachine(val config: CircuitBreakerStateM if config.halfOpenTimeoutDuration.toMillis != 0 then updateAfter(config.halfOpenTimeoutDuration, selfRef) case _ => () - private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine])(using Ox): Unit = + private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine]): Unit = forkDiscard: - scheduled(ScheduledConfig(Schedule.InitialDelay(after)))(actorRef.tell(_.updateState(actorRef))) + sleep(after) + actorRef.tell(_.updateState(actorRef)) - private[resilience] def nextState(metrics: Metrics, currentState: CircuitBreakerState): CircuitBreakerState = - val currentTimestamp = metrics.timestamp - val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) - val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) - val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls - currentState match - case CircuitBreakerState.Closed => - if minCallsRecorder && exceededThreshold then - if config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(currentTimestamp) - else CircuitBreakerState.Closed - case CircuitBreakerState.Open(since) => - val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis - if timePassed || config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(since) - case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState - lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis - // if we didn't complete all half open calls but timeout is reached go back to open - if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then - CircuitBreakerState.Open(currentTimestamp) - // If halfOpen calls were completed && rates are below we close breaker - else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed - // If halfOpen calls completed, but rates are still above go back to open - else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) - // We didn't complete all half open calls, keep halfOpen - else - lastAcquireResult match - case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => - CircuitBreakerState.HalfOpen(s, sem, completed + 1) - case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) - end if - end match - end nextState end CircuitBreakerStateMachine private[resilience] object CircuitBreakerStateMachine: @@ -94,23 +53,23 @@ private[resilience] object CircuitBreakerStateMachine: ): CircuitBreakerStateMachine = config.slidingWindow match case SlidingWindow.CountBased(size) => - CircuitBreakerCountStateMachine( + CircuitBreakerStateMachine( CircuitBreakerStateMachineConfig.fromConfig(config), - size + CountBased(size) ) case SlidingWindow.TimeBased(duration) => - CircuitBreakerTimeStateMachine( + CircuitBreakerStateMachine( CircuitBreakerStateMachineConfig.fromConfig(config), - duration + TimeWindowBased(duration) ) end apply - private[resilience] case class CircuitBreakerCountStateMachine( - stateMachineConfig: CircuitBreakerStateMachineConfig, - windowSize: Int - )(using ox: Ox) - extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): + sealed trait CircuitBreakerResults(using val ox: Ox): + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit + def updateResults(result: CircuitBreakerResult): Unit + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics + case class CountBased(windowSize: Int)(using ox: Ox) extends CircuitBreakerResults(using ox): private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) private var writeIndex = 0 @@ -148,14 +107,9 @@ private[resilience] object CircuitBreakerStateMachine: timestamp ) end calculateMetrics - end CircuitBreakerCountStateMachine - - private[resilience] case class CircuitBreakerTimeStateMachine( - stateMachineConfig: CircuitBreakerStateMachineConfig, - windowDuration: FiniteDuration - )(using ox: Ox) - extends CircuitBreakerStateMachine(stateMachineConfig)(using ox): + end CountBased + case class TimeWindowBased(windowDuration: FiniteDuration)(using ox: Ox) extends CircuitBreakerResults(using ox): // holds timestamp of recored operation and result private val queue = collection.mutable.Queue[(Long, CircuitBreakerResult)]() @@ -173,20 +127,60 @@ private[resilience] object CircuitBreakerStateMachine: timestamp ) end calculateMetrics + def updateResults(result: CircuitBreakerResult): Unit = queue.addOne((System.currentTimeMillis(), result)) + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = import CircuitBreakerState.* // we have to match so we don't reset result when for example incrementing completed calls in halfopen state (oldState, newState) match case (Closed, Open(_) | HalfOpen(_, _, _)) => - queue.clearAndShrink(config.minimumNumberOfCalls) + queue.clear() case (HalfOpen(_, _, _), Open(_) | Closed) => - queue.clearAndShrink(config.minimumNumberOfCalls) + queue.clear() case (Open(_), Closed | HalfOpen(_, _, _)) => - queue.clearAndShrink(config.minimumNumberOfCalls) + queue.clear() case (_, _) => () end match end onStateChange - end CircuitBreakerTimeStateMachine + end TimeWindowBased + + def nextState(metrics: Metrics, currentState: CircuitBreakerState, config: CircuitBreakerStateMachineConfig): CircuitBreakerState = + val currentTimestamp = metrics.timestamp + val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) + val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls + currentState match + case CircuitBreakerState.Closed => + if minCallsRecorder && exceededThreshold then + if config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(currentTimestamp) + else CircuitBreakerState.Closed + case CircuitBreakerState.Open(since) => + val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis + if timePassed || config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(since) + case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => + lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState + lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis + // if we didn't complete all half open calls but timeout is reached go back to open + if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + CircuitBreakerState.Open(currentTimestamp) + // If halfOpen calls were completed && rates are below we close breaker + else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed + // If halfOpen calls completed, but rates are still above go back to open + else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) + // We didn't complete all half open calls, keep halfOpen + else + lastAcquireResult match + case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => + CircuitBreakerState.HalfOpen(s, sem, completed + 1) + case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) + end if + end match + end nextState + end CircuitBreakerStateMachine diff --git a/core/src/main/scala/ox/util.scala b/core/src/main/scala/ox/util.scala index 47a54d74..eb4df4ca 100644 --- a/core/src/main/scala/ox/util.scala +++ b/core/src/main/scala/ox/util.scala @@ -1,7 +1,7 @@ package ox import scala.concurrent.{Await, Future} -import scala.concurrent.duration.Duration +import scala.concurrent.duration.* import scala.util.control.NonFatal extension [T](inline t: T) @@ -105,3 +105,11 @@ inline def uninterruptible[T](inline f: T): T = /** Sleep (block the current thread/fork) for the provided amount of time. */ inline def sleep(inline howLong: Duration): Unit = Thread.sleep(howLong.toMillis) + +/** Provide duration and result for operation. */ +inline def timed[T](operation: => T): (FiniteDuration, T) = + val before = System.nanoTime() + val result = operation + val after = System.nanoTime() + val duration = (after - before).nanos + (duration, result) diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala index ccfc1f8a..0be575b5 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala @@ -20,7 +20,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) // when - val resultingState = stateMachine.nextState(metrics, CircuitBreakerState.Closed) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed, stateMachine.config) resultingState shouldBe CircuitBreakerState.Closed } @@ -33,7 +33,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) // when - val resultingState = stateMachine.nextState(metrics, CircuitBreakerState.Closed) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed, stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.Open] @@ -47,7 +47,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) // when - val resultingState = stateMachine.nextState(metrics, CircuitBreakerState.Closed) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed, stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.HalfOpen] @@ -69,7 +69,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: // when val resultingState = - stateMachine.nextState(metrics, CircuitBreakerState.HalfOpen(timestamp, Semaphore(10), 0)) + CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.HalfOpen(timestamp, Semaphore(10), 0), stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.Open] @@ -86,7 +86,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, timestamp) // when - val resultingState = stateMachine.nextState(metrics, state) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.HalfOpen] @@ -104,7 +104,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val metrics = Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.numberOfCallsInHalfOpenState, lastResult, timestamp) // when - val resultingState = stateMachine.nextState(metrics, state) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) // then resultingState shouldBe CircuitBreakerState.Closed @@ -121,7 +121,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.numberOfCallsInHalfOpenState, lastResult, timestamp) // when - val resultingState = stateMachine.nextState(metrics, state) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.Open] @@ -144,7 +144,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: ) // when - val resultingState = stateMachine.nextState(metrics, state) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.HalfOpen] From c617c66a44ab3b582af328d4eca6457f7b944a8f Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 20 Jan 2025 13:21:02 +0100 Subject: [PATCH 11/26] don't go through all results on every call --- .../CircuitBreakerStateMachine.scala | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index cfd41103..11192ba4 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -70,35 +70,43 @@ private[resilience] object CircuitBreakerStateMachine: def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics case class CountBased(windowSize: Int)(using ox: Ox) extends CircuitBreakerResults(using ox): - private val callResults: Array[Option[CircuitBreakerResult]] = Array.fill[Option[CircuitBreakerResult]](windowSize)(None) - private var writeIndex = 0 + private val results = new collection.mutable.ArrayDeque[CircuitBreakerResult](windowSize) + private var slowCalls = 0 + private var failedCalls = 0 + private var successCalls = 0 def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = import CircuitBreakerState.* // we have to match so we don't reset result when for example incrementing completed calls in halfopen state (oldState, newState) match case (Closed, Open(_) | HalfOpen(_, _, _)) => - callResults.mapInPlace(_ => None).discard - writeIndex = 0 + results.clear() case (HalfOpen(_, _, _), Open(_) | Closed) => - callResults.mapInPlace(_ => None).discard - writeIndex = 0 + results.clear() case (Open(_), Closed | HalfOpen(_, _, _)) => - callResults.mapInPlace(_ => None).discard - writeIndex = 0 + results.clear() case (_, _) => () end match end onStateChange def updateResults(result: CircuitBreakerResult): Unit = - callResults(writeIndex) = Some(result) - writeIndex = (writeIndex + 1) % windowSize + result match + case CircuitBreakerResult.Success => successCalls += 1 + case CircuitBreakerResult.Failure => failedCalls += 1 + case CircuitBreakerResult.Slow => slowCalls += 1 + val resultingQueue = results.addOne(result) + if resultingQueue.length > windowSize then + resultingQueue.removeHeadOption(false) match + case Some(CircuitBreakerResult.Success) => successCalls -= 1 + case Some(CircuitBreakerResult.Failure) => failedCalls -= 1 + case Some(CircuitBreakerResult.Slow) => slowCalls -= 1 + case None => () + end updateResults def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = - val results = callResults.flatMap(identity) val numOfOperations = results.length - val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / windowSize.toFloat) * 100).toInt - val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / windowSize.toFloat) * 100).toInt + val failuresRate = ((failedCalls / numOfOperations.toFloat) * 100).toInt + val slowRate = ((slowCalls / numOfOperations.toFloat) * 100).toInt Metrics( failuresRate, slowRate, @@ -115,10 +123,10 @@ private[resilience] object CircuitBreakerStateMachine: def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = // filter all entries that happend outside sliding window - val results = queue.filter((time, _) => timestamp > time + windowDuration.toMillis) + val results = queue.filterInPlace((time, _) => timestamp > time + windowDuration.toMillis) val numOfOperations = results.length - val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / results.length.toFloat) * 100).toInt - val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / results.length.toFloat) * 100).toInt + val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / numOfOperations.toFloat) * 100).toInt + val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / numOfOperations.toFloat) * 100).toInt Metrics( failuresRate, slowRate, From 64d694f0760c363ed10765c135c22ddcba4d175c Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 20 Jan 2025 18:45:31 +0100 Subject: [PATCH 12/26] CircuiBreaker docs calculate rolling metrics, more tests --- .../scala/ox/resilience/CircuitBreaker.scala | 4 +- .../ox/resilience/CircuitBreakerConfig.scala | 29 ++-- .../CircuitBreakerStateMachine.scala | 94 ++++++------- .../ox/resilience/CircuitBreakerTest.scala | 130 +++++++++++++++++- doc/utils/circuit-breaker.md | 80 +++++++++++ 5 files changed, 276 insertions(+), 61 deletions(-) create mode 100644 doc/utils/circuit-breaker.md diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 8e1b85df..167e32f0 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -53,8 +53,8 @@ end CircuitBreakerStateMachineConfig /** Circuit Breaker. Operations can be dropped, when the breaker is open or if it doesn't take more operation in halfOpen state. The Circuit * Breaker might calculate different metrics based on [[SlidingWindow]] provided in config. See [[SlidingWindow]] for more details. */ -class CircuitBreaker(val config: CircuitBreakerConfig)(using Ox): - private val stateMachine = CircuitBreakerStateMachine(config) +case class CircuitBreaker(config: CircuitBreakerConfig)(using Ox): + private[resilience] val stateMachine = CircuitBreakerStateMachine(config) private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) private def tryAcquire: AcquireResult = stateMachine.state match diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala index 228afa5f..a73bbd5d 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala @@ -38,14 +38,14 @@ end SlidingWindow * also maximum number of operations that can be started in half open state. */ case class CircuitBreakerConfig( - failureRateThreshold: Int = 50, - slowCallThreshold: Int = 0, - slowCallDurationThreshold: FiniteDuration = 60.seconds, - slidingWindow: SlidingWindow = SlidingWindow.CountBased(100), - minimumNumberOfCalls: Int = 20, - waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS), - halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), - numberOfCallsInHalfOpenState: Int = 10 + failureRateThreshold: Int, + slowCallThreshold: Int, + slowCallDurationThreshold: FiniteDuration, + slidingWindow: SlidingWindow, + minimumNumberOfCalls: Int, + waitDurationOpenState: FiniteDuration, + halfOpenTimeoutDuration: FiniteDuration, + numberOfCallsInHalfOpenState: Int ): assert( failureRateThreshold >= 0 && failureRateThreshold <= 100, @@ -60,3 +60,16 @@ case class CircuitBreakerConfig( s"numberOfCallsInHalfOpenState must be greater than 0, value: $numberOfCallsInHalfOpenState" ) end CircuitBreakerConfig + +object CircuitBreakerConfig: + def default: CircuitBreakerConfig = CircuitBreakerConfig( + failureRateThreshold = 50, + slowCallThreshold = 50, + slowCallDurationThreshold = 60.seconds, + slidingWindow = SlidingWindow.CountBased(100), + minimumNumberOfCalls = 20, + waitDurationOpenState = FiniteDuration(10, TimeUnit.SECONDS), + halfOpenTimeoutDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), + numberOfCallsInHalfOpenState = 10 + ) +end CircuitBreakerConfig diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 11192ba4..21590389 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -8,7 +8,7 @@ import ox.resilience.CircuitBreakerStateMachine.nextState private[resilience] case class CircuitBreakerStateMachine( config: CircuitBreakerStateMachineConfig, - results: CircuitBreakerStateMachine.CircuitBreakerResults + results: CircuitBreakerResults )(using val ox: Ox): @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed @@ -55,20 +55,60 @@ private[resilience] object CircuitBreakerStateMachine: case SlidingWindow.CountBased(size) => CircuitBreakerStateMachine( CircuitBreakerStateMachineConfig.fromConfig(config), - CountBased(size) + CircuitBreakerResults.CountBased(size) ) case SlidingWindow.TimeBased(duration) => CircuitBreakerStateMachine( CircuitBreakerStateMachineConfig.fromConfig(config), - TimeWindowBased(duration) + CircuitBreakerResults.TimeWindowBased(duration) ) end apply - sealed trait CircuitBreakerResults(using val ox: Ox): - def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit - def updateResults(result: CircuitBreakerResult): Unit - def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics + def nextState(metrics: Metrics, currentState: CircuitBreakerState, config: CircuitBreakerStateMachineConfig): CircuitBreakerState = + val currentTimestamp = metrics.timestamp + val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) + val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls + currentState match + case CircuitBreakerState.Closed => + if minCallsRecorder && exceededThreshold then + if config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(currentTimestamp) + else CircuitBreakerState.Closed + case CircuitBreakerState.Open(since) => + val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis + if timePassed || config.waitDurationOpenState.toMillis == 0 then + CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else CircuitBreakerState.Open(since) + case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => + lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState + lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis + // if we didn't complete all half open calls but timeout is reached go back to open + if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then + CircuitBreakerState.Open(currentTimestamp) + // If halfOpen calls were completed && rates are below we close breaker + else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed + // If halfOpen calls completed, but rates are still above go back to open + else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) + // We didn't complete all half open calls, keep halfOpen + else + lastAcquireResult match + case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => + CircuitBreakerState.HalfOpen(s, sem, completed + 1) + case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) + end if + end match + end nextState + +end CircuitBreakerStateMachine + +private[resilience] sealed trait CircuitBreakerResults(using val ox: Ox): + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit + def updateResults(result: CircuitBreakerResult): Unit + def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics +private[resilience] object CircuitBreakerResults: case class CountBased(windowSize: Int)(using ox: Ox) extends CircuitBreakerResults(using ox): private val results = new collection.mutable.ArrayDeque[CircuitBreakerResult](windowSize) private var slowCalls = 0 @@ -153,42 +193,4 @@ private[resilience] object CircuitBreakerStateMachine: end match end onStateChange end TimeWindowBased - - def nextState(metrics: Metrics, currentState: CircuitBreakerState, config: CircuitBreakerStateMachineConfig): CircuitBreakerState = - val currentTimestamp = metrics.timestamp - val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) - val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) - val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls - currentState match - case CircuitBreakerState.Closed => - if minCallsRecorder && exceededThreshold then - if config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(currentTimestamp) - else CircuitBreakerState.Closed - case CircuitBreakerState.Open(since) => - val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis - if timePassed || config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(since) - case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState - lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis - // if we didn't complete all half open calls but timeout is reached go back to open - if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then - CircuitBreakerState.Open(currentTimestamp) - // If halfOpen calls were completed && rates are below we close breaker - else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed - // If halfOpen calls completed, but rates are still above go back to open - else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) - // We didn't complete all half open calls, keep halfOpen - else - lastAcquireResult match - case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => - CircuitBreakerState.HalfOpen(s, sem, completed + 1) - case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) - end if - end match - end nextState - -end CircuitBreakerStateMachine +end CircuitBreakerResults diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 405739e3..854705af 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -2,27 +2,86 @@ package ox.resilience import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers +import org.scalatest.OptionValues import scala.concurrent.duration.* import ox.* +import org.scalatest.EitherValues -class CircuitBreakerTest extends AnyFlatSpec with Matchers: - behavior of "Circuit Breaker" +class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues with EitherValues: + behavior of "Circuit Breaker run operations" - it should "drop operation after exceeding threshold" in supervised { + it should "run operation when metrics are not exceeded" in supervised { // given val thresholdRate = 100 - val numberOfOperations = 1 + val numberOfOperations = 2 val circuitBreaker = CircuitBreaker( - CircuitBreakerConfig( + CircuitBreakerConfig.default.copy( failureRateThreshold = thresholdRate, minimumNumberOfCalls = numberOfOperations, slidingWindow = SlidingWindow.CountBased(numberOfOperations), numberOfCallsInHalfOpenState = 1 ) ) + var counter = 0 + def f(): Either[String, String] = + counter += 1 + if counter <= 1 then Left("boom") + else Right("success") + + // when + val result1 = circuitBreaker.runOrDropEither(f()) + sleep(100.millis) // wait for state to register + val result2 = circuitBreaker.runOrDropEither(f()) + + // then + result1 shouldBe defined + result1.value.left.value shouldBe "boom" + result2 shouldBe defined + result2.value.value shouldBe "success" + } + it should "drop operation after exceeding fauilure threshold" in supervised { + // given + val thresholdRate = 100 + val numberOfOperations = 1 + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig.default.copy( + failureRateThreshold = thresholdRate, + minimumNumberOfCalls = numberOfOperations, + slidingWindow = SlidingWindow.CountBased(numberOfOperations), + numberOfCallsInHalfOpenState = 1 + ) + ) def f(): Either[String, String] = Left("boom") + + // when + val result1 = circuitBreaker.runOrDropEither(f()) + sleep(100.millis) // wait for state to register + val result2 = circuitBreaker.runOrDropEither(f()) + + // then + result1 shouldBe defined + result2 shouldBe empty + } + + it should "drop operation after exceeding slow call threshold" in supervised { + // given + val thresholdRate = 100 + val numberOfOperations = 1 + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig.default.copy( + slowCallThreshold = thresholdRate, + minimumNumberOfCalls = numberOfOperations, + slowCallDurationThreshold = 100.millis, + slidingWindow = SlidingWindow.CountBased(numberOfOperations), + numberOfCallsInHalfOpenState = 1 + ) + ) + def f(): Either[String, String] = + sleep(500.millis) + Right("success") + // when val result1 = circuitBreaker.runOrDropEither(f()) sleep(100.millis) // wait for state to register @@ -33,4 +92,65 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers: result2 shouldBe empty } + behavior of "Circuit Breaker scheduled state changes" + + it should "switch to halfopen after configured time" in supervised { + // given + val thresholdRate = 100 + val numberOfOperations = 1 + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig.default.copy( + failureRateThreshold = thresholdRate, + minimumNumberOfCalls = numberOfOperations, + slidingWindow = SlidingWindow.CountBased(numberOfOperations), + numberOfCallsInHalfOpenState = 1, + waitDurationOpenState = 1.second + ) + ) + def f(): Either[String, String] = + Left("boom") + + // when + val result1 = circuitBreaker.runOrDropEither(f()) + sleep(100.millis) // wait for state to register + val state = circuitBreaker.stateMachine.state + sleep(1500.millis) + val stateAfterWait = circuitBreaker.stateMachine.state + + // then + result1 shouldBe defined + state shouldBe a[CircuitBreakerState.Open] + stateAfterWait shouldBe a[CircuitBreakerState.HalfOpen] + } + + it should "switch back to open after configured timeout in half open state" in supervised { + // given + val thresholdRate = 100 + val numberOfOperations = 1 + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig.default.copy( + failureRateThreshold = thresholdRate, + minimumNumberOfCalls = numberOfOperations, + slidingWindow = SlidingWindow.CountBased(numberOfOperations), + numberOfCallsInHalfOpenState = 1, + waitDurationOpenState = 10.millis, + halfOpenTimeoutDuration = 1.second + ) + ) + def f(): Either[String, String] = + Left("boom") + + // when + val result1 = circuitBreaker.runOrDropEither(f()) // trigger swithc to open + sleep(100.millis) // wait for state to register, and for switch to half open + val state = circuitBreaker.stateMachine.state + sleep(1500.millis) // wait longer than half open timeout + val stateAfterWait = circuitBreaker.stateMachine.state + + // then + result1 shouldBe defined + state shouldBe a[CircuitBreakerState.HalfOpen] + stateAfterWait shouldBe a[CircuitBreakerState.Open] + } + end CircuitBreakerTest diff --git a/doc/utils/circuit-breaker.md b/doc/utils/circuit-breaker.md new file mode 100644 index 00000000..67db0ba7 --- /dev/null +++ b/doc/utils/circuit-breaker.md @@ -0,0 +1,80 @@ +# Circuit Breaker + +The circuit breaker allows controlling execution of operations and stops if certain condition are met. CircuitBreaker is thread-safe and uses [actor](./actors.md) underneath to change breaker state. + +```{note} +Since actor executes on one thread which may be bottleneck. That means that calculating state change can be deleyad and breaker can let few more operations to complete before openning. +This can be the case with many very fast operations. +``` + +## API + +```scala mdoc:compile-only +import ox.supervised +import ox.resilience.* + +supervised: + val circuitBreaker = CircuitBreaker(CircuitBreakerConfig.default) + + type T + def operation: T = ??? + + val operationResult: Option[T] = circuitBreaker.runOrDrop(operation) +``` + +## Configuration + +### Sliding window + +There are two ways that metrics are calculated. + +- Count based sliding window - `SlidingWindow.CountBased`, counts metrics based on last n call results. +- Time based sliding window - `SlidingWindow.TimeBased`, counts metrics based on call results recorded in the lapse of duration before current time. + +### Failure rate and slow call rate thresholds + +The state of the CircuitBreaker changes from `Closed` to `Open` when the `failureRate` is greater or equal to configurable threshold. For example when 80% of recorded call results failed. +Failures are counted based on provided `ErrorMode`. + +The same state change also happen when percentage of slow calls (exceeding `slowCallDurationThreshold`) is equal or greater than configured threshold. For exmaple 80% of calls took longer then 10 seconds. + +Those metrics are considered only when number of recorder calls is greater or equal to `minimumNumberOfCalls`, otherwise we don't change state even if `failureRate` is 100%. + +### Parameters + +- `failureRateThreshold: Int = 50` - percentage of recorder calls marked as failed required to switch to open state +- `slowCallThreshold: Int = 50` - percentage of recorder calls marked as slow required to switch to open state +- `slowCallDurationThreshold: FiniteDuration = 60.seconds` - duration that call has to exceed to be marked as slow +- `slidingWindow: SlidingWindow = SlidingWindow.CountBased(100)` - mechanism to determine how many calls are recorded +- `minimumNumberOfCalls: Int = 20` - minium number of calls recored for breaker to be able to swtich to open state based on thresholds +- `waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS)` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen` +- `halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS)` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state +- `numberOfCallsInHalfOpenState: Int = 10` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed` + +## Examples + +```scala mdoc:compile-only +import ox.UnionMode +import ox.supervised +import ox.resilience.* +import scala.concurrent.duration.* + +def directOperation: Int = ??? +def eitherOperation: Either[String, Int] = ??? +def unionOperation: String | Int = ??? + +supervised: + val ciruictBreaker = CircuitBreaker(CircuitBreakerConfig.default) + + // various operation definitions + ciruictBreaker.runOrDrop(directOperation) + ciruictBreaker.runOrDropEither(eitherOperation) + + // custom error mode + ciruictBreaker.runOrDropWithErrorMode(UnionMode[String])(unionOperation) + + // retry with circuit breaker inside + retry(RetryConfig.backoff(3, 100.millis)){ + ciruictBreaker.runOrDrop(directOperation).get + } +``` From f42e74d5607198b5bb007350bf2e8b2cc65716d7 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 20 Jan 2025 19:25:05 +0100 Subject: [PATCH 13/26] added bigger time margin for test, track metrics per result --- .../CircuitBreakerStateMachine.scala | 54 ++++++++++++++----- .../ox/resilience/CircuitBreakerTest.scala | 8 +-- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 21590389..2fd03286 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -5,10 +5,11 @@ import ox.* import java.util.concurrent.Semaphore import ox.channels.ActorRef import ox.resilience.CircuitBreakerStateMachine.nextState +import scala.compiletime.ops.double private[resilience] case class CircuitBreakerStateMachine( config: CircuitBreakerStateMachineConfig, - results: CircuitBreakerResults + private val results: CircuitBreakerResults )(using val ox: Ox): @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed @@ -115,16 +116,22 @@ private[resilience] object CircuitBreakerResults: private var failedCalls = 0 private var successCalls = 0 + private def clearResults: Unit = + results.clear() + slowCalls = 0 + failedCalls = 0 + successCalls = 0 + def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = import CircuitBreakerState.* // we have to match so we don't reset result when for example incrementing completed calls in halfopen state (oldState, newState) match case (Closed, Open(_) | HalfOpen(_, _, _)) => - results.clear() + clearResults case (HalfOpen(_, _, _), Open(_) | Closed) => - results.clear() + clearResults case (Open(_), Closed | HalfOpen(_, _, _)) => - results.clear() + clearResults case (_, _) => () end match end onStateChange @@ -159,14 +166,31 @@ private[resilience] object CircuitBreakerResults: case class TimeWindowBased(windowDuration: FiniteDuration)(using ox: Ox) extends CircuitBreakerResults(using ox): // holds timestamp of recored operation and result - private val queue = collection.mutable.Queue[(Long, CircuitBreakerResult)]() + private val results = collection.mutable.ArrayDeque[(Long, CircuitBreakerResult)]() + private var slowCalls = 0 + private var failedCalls = 0 + private var successCalls = 0 + + private def clearResults(): Unit = + results.clear() + slowCalls = 0 + failedCalls = 0 + successCalls = 0 def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = // filter all entries that happend outside sliding window - val results = queue.filterInPlace((time, _) => timestamp > time + windowDuration.toMillis) - val numOfOperations = results.length - val failuresRate = ((results.count(_ == CircuitBreakerResult.Failure) / numOfOperations.toFloat) * 100).toInt - val slowRate = ((results.count(_ == CircuitBreakerResult.Slow) / numOfOperations.toFloat) * 100).toInt + val res = results.filterInPlace { (time, result) => + val isOlder = timestamp > time + windowDuration.toMillis + if isOlder then + result match + case CircuitBreakerResult.Success => successCalls -= 1 + case CircuitBreakerResult.Failure => failedCalls -= 1 + case CircuitBreakerResult.Slow => slowCalls -= 1 + isOlder + } + val numOfOperations = res.length + val failuresRate = ((failedCalls / numOfOperations.toFloat) * 100).toInt + val slowRate = ((slowCalls / numOfOperations.toFloat) * 100).toInt Metrics( failuresRate, slowRate, @@ -177,18 +201,22 @@ private[resilience] object CircuitBreakerResults: end calculateMetrics def updateResults(result: CircuitBreakerResult): Unit = - queue.addOne((System.currentTimeMillis(), result)) + result match + case CircuitBreakerResult.Success => successCalls += 1 + case CircuitBreakerResult.Failure => failedCalls += 1 + case CircuitBreakerResult.Slow => slowCalls += 1 + results.addOne((System.currentTimeMillis(), result)) def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = import CircuitBreakerState.* // we have to match so we don't reset result when for example incrementing completed calls in halfopen state (oldState, newState) match case (Closed, Open(_) | HalfOpen(_, _, _)) => - queue.clear() + clearResults() case (HalfOpen(_, _, _), Open(_) | Closed) => - queue.clear() + clearResults() case (Open(_), Closed | HalfOpen(_, _, _)) => - queue.clear() + clearResults() case (_, _) => () end match end onStateChange diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 854705af..ee12731f 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -133,8 +133,8 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit minimumNumberOfCalls = numberOfOperations, slidingWindow = SlidingWindow.CountBased(numberOfOperations), numberOfCallsInHalfOpenState = 1, - waitDurationOpenState = 10.millis, - halfOpenTimeoutDuration = 1.second + waitDurationOpenState = 1.seconds, + halfOpenTimeoutDuration = 2.seconds ) ) def f(): Either[String, String] = @@ -142,9 +142,9 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit // when val result1 = circuitBreaker.runOrDropEither(f()) // trigger swithc to open - sleep(100.millis) // wait for state to register, and for switch to half open + sleep(1500.millis) // wait for state to register, and for switch to half open val state = circuitBreaker.stateMachine.state - sleep(1500.millis) // wait longer than half open timeout + sleep(2500.millis) // wait longer than half open timeout val stateAfterWait = circuitBreaker.stateMachine.state // then From 6f376eb0551f04298ec5dd9966560797eb11ada8 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 20 Jan 2025 20:41:08 +0100 Subject: [PATCH 14/26] use removeHeadWhile instead of filterInPlace --- .../resilience/CircuitBreakerStateMachine.scala | 16 +++++++--------- .../scala/ox/resilience/CircuitBreakerTest.scala | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 2fd03286..6b2b4793 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -179,16 +179,14 @@ private[resilience] object CircuitBreakerResults: def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = // filter all entries that happend outside sliding window - val res = results.filterInPlace { (time, result) => - val isOlder = timestamp > time + windowDuration.toMillis - if isOlder then - result match - case CircuitBreakerResult.Success => successCalls -= 1 - case CircuitBreakerResult.Failure => failedCalls -= 1 - case CircuitBreakerResult.Slow => slowCalls -= 1 - isOlder + val removed = results.removeHeadWhile((time, _) => timestamp > time + windowDuration.toMillis) + removed.foreach { (_, result) => + result match + case CircuitBreakerResult.Success => successCalls -= 1 + case CircuitBreakerResult.Failure => failedCalls -= 1 + case CircuitBreakerResult.Slow => slowCalls -= 1 } - val numOfOperations = res.length + val numOfOperations = results.length val failuresRate = ((failedCalls / numOfOperations.toFloat) * 100).toInt val slowRate = ((slowCalls / numOfOperations.toFloat) * 100).toInt Metrics( diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index ee12731f..e0955389 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -141,7 +141,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit Left("boom") // when - val result1 = circuitBreaker.runOrDropEither(f()) // trigger swithc to open + val result1 = circuitBreaker.runOrDropEither(f()) // trigger switch to open sleep(1500.millis) // wait for state to register, and for switch to half open val state = circuitBreaker.stateMachine.state sleep(2500.millis) // wait longer than half open timeout From 3581bd99c13f525c2d63b7f35d3879f22e8ad6ce Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 20 Jan 2025 20:49:31 +0100 Subject: [PATCH 15/26] introduce PercentageThreshold type --- .../scala/ox/resilience/CircuitBreaker.scala | 4 +-- .../ox/resilience/CircuitBreakerConfig.scala | 27 ++++++++++--------- .../CircuitBreakerStateMachine.scala | 1 - .../CircuitBreakerStateMachineTest.scala | 4 +-- .../ox/resilience/CircuitBreakerTest.scala | 10 +++---- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 167e32f0..f98c8338 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -40,8 +40,8 @@ private case class CircuitBreakerStateMachineConfig( private object CircuitBreakerStateMachineConfig: def fromConfig(c: CircuitBreakerConfig): CircuitBreakerStateMachineConfig = CircuitBreakerStateMachineConfig( - failureRateThreshold = c.failureRateThreshold, - slowCallThreshold = c.slowCallThreshold, + failureRateThreshold = c.failureRateThreshold.toInt, + slowCallThreshold = c.slowCallThreshold.toInt, slowCallDurationThreshold = c.slowCallDurationThreshold, minimumNumberOfCalls = c.minimumNumberOfCalls, numberOfCallsInHalfOpenState = c.numberOfCallsInHalfOpenState, diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala index a73bbd5d..ce197b18 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala @@ -19,6 +19,16 @@ enum SlidingWindow: case TimeBased(duration: FiniteDuration) end SlidingWindow +/** Type representing percentage threshold between 0 and 100 */ +opaque type PercentageThreshold = Int + +extension (c: PercentageThreshold) def toInt: Int = c + +object PercentageThreshold: + def apply(c: Int): PercentageThreshold = + assert(c >= 0 && c <= 100, s"PercentageThreshold must be between 0 and 100, value: $c") + c + /** @param failureRateThreshold * threshold, as percentage of operations that ended in failure * @param slowCallThreshold @@ -38,8 +48,8 @@ end SlidingWindow * also maximum number of operations that can be started in half open state. */ case class CircuitBreakerConfig( - failureRateThreshold: Int, - slowCallThreshold: Int, + failureRateThreshold: PercentageThreshold, + slowCallThreshold: PercentageThreshold, slowCallDurationThreshold: FiniteDuration, slidingWindow: SlidingWindow, minimumNumberOfCalls: Int, @@ -47,14 +57,7 @@ case class CircuitBreakerConfig( halfOpenTimeoutDuration: FiniteDuration, numberOfCallsInHalfOpenState: Int ): - assert( - failureRateThreshold >= 0 && failureRateThreshold <= 100, - s"failureRateThreshold must be between 0 and 100, value: $failureRateThreshold" - ) - assert( - slowCallThreshold >= 0 && slowCallThreshold <= 100, - s"slowCallThreshold must be between 0 and 100, value: $slowCallThreshold" - ) + assert( numberOfCallsInHalfOpenState > 0, s"numberOfCallsInHalfOpenState must be greater than 0, value: $numberOfCallsInHalfOpenState" @@ -63,8 +66,8 @@ end CircuitBreakerConfig object CircuitBreakerConfig: def default: CircuitBreakerConfig = CircuitBreakerConfig( - failureRateThreshold = 50, - slowCallThreshold = 50, + failureRateThreshold = PercentageThreshold(50), + slowCallThreshold = PercentageThreshold(50), slowCallDurationThreshold = 60.seconds, slidingWindow = SlidingWindow.CountBased(100), minimumNumberOfCalls = 20, diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 6b2b4793..c0fa7653 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -5,7 +5,6 @@ import ox.* import java.util.concurrent.Semaphore import ox.channels.ActorRef import ox.resilience.CircuitBreakerStateMachine.nextState -import scala.compiletime.ops.double private[resilience] case class CircuitBreakerStateMachine( config: CircuitBreakerStateMachineConfig, diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala index 0be575b5..acfd7402 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala @@ -152,8 +152,8 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: private val defaultConfig: CircuitBreakerConfig = CircuitBreakerConfig( - failureRateThreshold = 50, - slowCallThreshold = 50, + failureRateThreshold = PercentageThreshold(50), + slowCallThreshold = PercentageThreshold(50), slowCallDurationThreshold = 60.seconds, slidingWindow = SlidingWindow.CountBased(100), minimumNumberOfCalls = 20, diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index e0955389..1c6ba8e0 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -12,7 +12,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit it should "run operation when metrics are not exceeded" in supervised { // given - val thresholdRate = 100 + val thresholdRate = PercentageThreshold(100) val numberOfOperations = 2 val circuitBreaker = CircuitBreaker( CircuitBreakerConfig.default.copy( @@ -42,7 +42,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit it should "drop operation after exceeding fauilure threshold" in supervised { // given - val thresholdRate = 100 + val thresholdRate = PercentageThreshold(100) val numberOfOperations = 1 val circuitBreaker = CircuitBreaker( CircuitBreakerConfig.default.copy( @@ -67,7 +67,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit it should "drop operation after exceeding slow call threshold" in supervised { // given - val thresholdRate = 100 + val thresholdRate = PercentageThreshold(100) val numberOfOperations = 1 val circuitBreaker = CircuitBreaker( CircuitBreakerConfig.default.copy( @@ -96,7 +96,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit it should "switch to halfopen after configured time" in supervised { // given - val thresholdRate = 100 + val thresholdRate = PercentageThreshold(100) val numberOfOperations = 1 val circuitBreaker = CircuitBreaker( CircuitBreakerConfig.default.copy( @@ -125,7 +125,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit it should "switch back to open after configured timeout in half open state" in supervised { // given - val thresholdRate = 100 + val thresholdRate = PercentageThreshold(100) val numberOfOperations = 1 val circuitBreaker = CircuitBreaker( CircuitBreakerConfig.default.copy( From 55afda2492b67d46ff8491ff16607354b055b05d Mon Sep 17 00:00:00 2001 From: adamw <adam@warski.org> Date: Tue, 21 Jan 2025 14:23:38 +0100 Subject: [PATCH 16/26] Try to fix tests --- .../main/scala/ox/resilience/CircuitBreakerStateMachine.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index c0fa7653..2672f8bd 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -77,13 +77,13 @@ private[resilience] object CircuitBreakerStateMachine: else CircuitBreakerState.Open(currentTimestamp) else CircuitBreakerState.Closed case CircuitBreakerState.Open(since) => - val timePassed = (currentTimestamp - since) > config.waitDurationOpenState.toMillis + val timePassed = (currentTimestamp - since) >= config.waitDurationOpenState.toMillis if timePassed || config.waitDurationOpenState.toMillis == 0 then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(since) case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState - lazy val timePassed = (currentTimestamp - since) > config.halfOpenTimeoutDuration.toMillis + lazy val timePassed = (currentTimestamp - since) >= config.halfOpenTimeoutDuration.toMillis // if we didn't complete all half open calls but timeout is reached go back to open if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then CircuitBreakerState.Open(currentTimestamp) From 6e34c70021c23b82750ca3cf8b6639d520816509 Mon Sep 17 00:00:00 2001 From: adamw <adam@warski.org> Date: Tue, 21 Jan 2025 14:43:34 +0100 Subject: [PATCH 17/26] Fix helper method conflict --- .../test/scala/ox/kafka/manual/pekko/publishPekko.scala | 4 ++-- .../test/scala/ox/kafka/manual/pekko/transferPekko.scala | 4 ++-- kafka/src/test/scala/ox/kafka/manual/publish.scala | 2 +- kafka/src/test/scala/ox/kafka/manual/transfer.scala | 2 +- kafka/src/test/scala/ox/kafka/manual/util.scala | 9 ++++----- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/kafka/src/test/scala/ox/kafka/manual/pekko/publishPekko.scala b/kafka/src/test/scala/ox/kafka/manual/pekko/publishPekko.scala index adf48e12..92f659e4 100644 --- a/kafka/src/test/scala/ox/kafka/manual/pekko/publishPekko.scala +++ b/kafka/src/test/scala/ox/kafka/manual/pekko/publishPekko.scala @@ -7,14 +7,14 @@ import org.apache.pekko.kafka.ProducerSettings import org.apache.pekko.kafka.scaladsl.Producer import org.apache.pekko.stream.scaladsl.Source import ox.{discard, get} -import ox.kafka.manual.{randomString, timed} +import ox.kafka.manual.{randomString, timedAndLogged} import scala.concurrent.Await import scala.concurrent.duration.Duration @main def publishPekko(): Unit = val topic = "t2" - timed("publish-pekko") { + timedAndLogged("publish-pekko") { given system: ActorSystem = ActorSystem("publish") val producerSettings = ProducerSettings(system, new StringSerializer, new StringSerializer).withBootstrapServers("localhost:29092") diff --git a/kafka/src/test/scala/ox/kafka/manual/pekko/transferPekko.scala b/kafka/src/test/scala/ox/kafka/manual/pekko/transferPekko.scala index 48350911..00de76b9 100644 --- a/kafka/src/test/scala/ox/kafka/manual/pekko/transferPekko.scala +++ b/kafka/src/test/scala/ox/kafka/manual/pekko/transferPekko.scala @@ -8,7 +8,7 @@ import org.apache.pekko.kafka.scaladsl.Consumer.DrainingControl import org.apache.pekko.kafka.scaladsl.{Committer, Consumer, Producer} import org.apache.pekko.kafka.{CommitterSettings, ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions} import ox.{discard, get} -import ox.kafka.manual.timed +import ox.kafka.manual.timedAndLogged import scala.concurrent.Await import scala.concurrent.duration.Duration @@ -18,7 +18,7 @@ import scala.concurrent.duration.Duration val destTopic = "t2mapped" val group = "group2" - timed("transfer-pekko") { + timedAndLogged("transfer-pekko") { given system: ActorSystem = ActorSystem("transfer") val producerSettings = ProducerSettings(system, new StringSerializer, new StringSerializer).withBootstrapServers("localhost:29092") diff --git a/kafka/src/test/scala/ox/kafka/manual/publish.scala b/kafka/src/test/scala/ox/kafka/manual/publish.scala index 6038f1e2..5ffd4f97 100644 --- a/kafka/src/test/scala/ox/kafka/manual/publish.scala +++ b/kafka/src/test/scala/ox/kafka/manual/publish.scala @@ -9,7 +9,7 @@ import ox.flow.Flow @main def publish(): Unit = val topic = "t1" - timed("publish") { + timedAndLogged("publish") { import KafkaStage.* val bootstrapServer = "localhost:29092" diff --git a/kafka/src/test/scala/ox/kafka/manual/transfer.scala b/kafka/src/test/scala/ox/kafka/manual/transfer.scala index c14cc54e..2e70b0b1 100644 --- a/kafka/src/test/scala/ox/kafka/manual/transfer.scala +++ b/kafka/src/test/scala/ox/kafka/manual/transfer.scala @@ -11,7 +11,7 @@ import ox.kafka.ConsumerSettings.AutoOffsetReset val destTopic = "t1mapped" val group = "group1" - timed("transfer") { + timedAndLogged("transfer") { import KafkaStage.* val bootstrapServer = "localhost:29092" diff --git a/kafka/src/test/scala/ox/kafka/manual/util.scala b/kafka/src/test/scala/ox/kafka/manual/util.scala index 5a797e29..5441b20e 100644 --- a/kafka/src/test/scala/ox/kafka/manual/util.scala +++ b/kafka/src/test/scala/ox/kafka/manual/util.scala @@ -1,12 +1,11 @@ package ox.kafka.manual import scala.util.Random +import ox.timed -def timed[T](name: String)(f: => T): T = - val start = System.currentTimeMillis() - val result = f - val end = System.currentTimeMillis() - println(s"$name took ${end - start}ms") +def timedAndLogged[T](name: String)(f: => T): T = + val (took, result) = timed(f) + println(s"$name took ${took.toMillis}ms") result def randomString() = Random().alphanumeric.take(100).mkString From 09cc330f8768c8daa4d984ad0cdaee2dc04fea90 Mon Sep 17 00:00:00 2001 From: adamw <adam@warski.org> Date: Wed, 22 Jan 2025 08:58:22 +0100 Subject: [PATCH 18/26] Failing test --- .../ox/resilience/CircuitBreakerTest.scala | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 1c6ba8e0..4f18d28e 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -153,4 +153,56 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit stateAfterWait shouldBe a[CircuitBreakerState.Open] } + it should "correctly transitions through states when there are concurrently running operations" in supervised { + // given + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig.default.copy( + failureRateThreshold = PercentageThreshold(100), + minimumNumberOfCalls = 1, + slidingWindow = SlidingWindow.TimeBased(2.seconds), + numberOfCallsInHalfOpenState = 1, + waitDurationOpenState = 1.second, + halfOpenTimeoutDuration = 1.second + ) + ) + + // when + + // concurrently, run two failing operations + forkDiscard { + circuitBreaker.runOrDropEither { + sleep(500.millis) + Left("a") + } + } + forkDiscard { + circuitBreaker.runOrDropEither { + sleep(1.second) + Left("b") + } + } + + // then + + // 250ms: no operations complete yet, should be closed + sleep(250.millis) + circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed + + // 750ms: the first operation failed, should be open + sleep(500.millis) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Open] + + // 1750ms: first operation failed more than 1s ago, second operation failed less than 1s ago; should we now go to half-open or stay at open? + sleep(1.second) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.HalfOpen] + + // 2250ms: more than 1s after the last failing operation, should be now half-open + sleep(500.millis) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.HalfOpen] + + // 3250ms: more than 2s after the last failing operation, should be closed + sleep(1.second) + circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed + } + end CircuitBreakerTest From 5be4f247f4a546a40588e4ba9ec8ea0f77c7d222 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Wed, 22 Jan 2025 14:12:20 +0100 Subject: [PATCH 19/26] fix edge case for last completed call in halfOpen state --- .../CircuitBreakerStateMachine.scala | 26 +++++++++++++------ .../CircuitBreakerStateMachineTest.scala | 19 +++++++++++++- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 2672f8bd..287d0905 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -15,14 +15,16 @@ private[resilience] case class CircuitBreakerStateMachine( def state: CircuitBreakerState = _state def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = + println(s"Result: $result, $acquired, $_state, ${System.currentTimeMillis()}") results.updateResults(result) updateState(selfRef, Some(acquired)) end registerResult def updateState(selfRef: ActorRef[CircuitBreakerStateMachine], acquiredResult: Option[AcquireResult] = None): Unit = val oldState = _state - val newState = nextState(results.calculateMetrics(None, System.currentTimeMillis()), oldState, config) + val newState = nextState(results.calculateMetrics(acquiredResult, System.currentTimeMillis()), oldState, config) _state = newState + println(s"${(oldState, newState)}, ${System.currentTimeMillis()}") scheduleCallback(oldState, newState, selfRef) results.onStateChange(oldState, newState) @@ -32,10 +34,13 @@ private[resilience] case class CircuitBreakerStateMachine( selfRef: ActorRef[CircuitBreakerStateMachine] ): Unit = (oldState, newState) match - case (CircuitBreakerState.Closed, CircuitBreakerState.Open(_)) => + case (_, CircuitBreakerState.Open(_)) => // schedule switch to halfOpen after timeout updateAfter(config.waitDurationOpenState, selfRef) - case (CircuitBreakerState.Open(_), CircuitBreakerState.HalfOpen(since, semaphore, completedOperations)) => + case ( + CircuitBreakerState.Open(_) | CircuitBreakerState.Closed, + CircuitBreakerState.HalfOpen(since, semaphore, completedOperations) + ) => // schedule timeout for halfOpen state if is not 0 if config.halfOpenTimeoutDuration.toMillis != 0 then updateAfter(config.halfOpenTimeoutDuration, selfRef) case _ => () @@ -43,6 +48,7 @@ private[resilience] case class CircuitBreakerStateMachine( private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine]): Unit = forkDiscard: sleep(after) + println(s"Scheduled - ${System.currentTimeMillis()}") actorRef.tell(_.updateState(actorRef)) end CircuitBreakerStateMachine @@ -66,7 +72,10 @@ private[resilience] object CircuitBreakerStateMachine: def nextState(metrics: Metrics, currentState: CircuitBreakerState, config: CircuitBreakerStateMachineConfig): CircuitBreakerState = val currentTimestamp = metrics.timestamp - val lastAcquireResult = metrics.lastAcquisitionResult.filter(_.acquired) + // We want to know if last result should be added to completed calls in halfOpen state + val lastCompletedCall = metrics.lastAcquisitionResult match + case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => 1 + case _ => 0 val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls currentState match @@ -82,7 +91,7 @@ private[resilience] object CircuitBreakerStateMachine: CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(since) case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val allCallsInHalfOpenCompleted = completedCalls >= config.numberOfCallsInHalfOpenState + lazy val allCallsInHalfOpenCompleted = (completedCalls + lastCompletedCall) >= config.numberOfCallsInHalfOpenState lazy val timePassed = (currentTimestamp - since) >= config.halfOpenTimeoutDuration.toMillis // if we didn't complete all half open calls but timeout is reached go back to open if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then @@ -93,9 +102,10 @@ private[resilience] object CircuitBreakerStateMachine: else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) // We didn't complete all half open calls, keep halfOpen else - lastAcquireResult match - case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => - CircuitBreakerState.HalfOpen(s, sem, completed + 1) + metrics.lastAcquisitionResult match + case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, _, _))) + if s == since => // Check if this is the same HalfOpen state + CircuitBreakerState.HalfOpen(since, semaphore, completedCalls + 1) case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) end if end match diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala index acfd7402..9a37a723 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala @@ -110,7 +110,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: resultingState shouldBe CircuitBreakerState.Closed } - it should "go to open after enough calls with bad metrics are recorded" in supervised { + it should "go to open after enough calls with bad metrics are recorded in halfOpen state" in supervised { // given val config = defaultConfig val stateMachine = CircuitBreakerStateMachine(config) @@ -127,6 +127,23 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: resultingState shouldBe a[CircuitBreakerState.Open] } + it should "go to closed after enough calls with good metrics are recorded in halfOpen state" in supervised { + // given + val config = defaultConfig + val stateMachine = CircuitBreakerStateMachine(config) + val completedCalls = config.numberOfCallsInHalfOpenState - 1 + val timestamp = System.currentTimeMillis() + val state = CircuitBreakerState.HalfOpen(timestamp, Semaphore(0), completedCalls) + val lastResult: Option[AcquireResult] = Some(AcquireResult(true, state)) + val metrics = Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.numberOfCallsInHalfOpenState, lastResult, timestamp) + + // when + val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) + + // then + resultingState shouldBe CircuitBreakerState.Closed + } + it should "go to half open after waitDurationOpenState passes" in supervised { // given val config = defaultConfig From 91399826641936f2ea151c36e12855dac9337a15 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Wed, 22 Jan 2025 14:23:34 +0100 Subject: [PATCH 20/26] Add test case for wrong calculation of metrics --- .../CircuitBreakerStateMachine.scala | 3 - .../ox/resilience/CircuitBreakerTest.scala | 58 ++++++++++++++++++- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 287d0905..60849204 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -15,7 +15,6 @@ private[resilience] case class CircuitBreakerStateMachine( def state: CircuitBreakerState = _state def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = - println(s"Result: $result, $acquired, $_state, ${System.currentTimeMillis()}") results.updateResults(result) updateState(selfRef, Some(acquired)) end registerResult @@ -24,7 +23,6 @@ private[resilience] case class CircuitBreakerStateMachine( val oldState = _state val newState = nextState(results.calculateMetrics(acquiredResult, System.currentTimeMillis()), oldState, config) _state = newState - println(s"${(oldState, newState)}, ${System.currentTimeMillis()}") scheduleCallback(oldState, newState, selfRef) results.onStateChange(oldState, newState) @@ -48,7 +46,6 @@ private[resilience] case class CircuitBreakerStateMachine( private def updateAfter(after: FiniteDuration, actorRef: ActorRef[CircuitBreakerStateMachine]): Unit = forkDiscard: sleep(after) - println(s"Scheduled - ${System.currentTimeMillis()}") actorRef.tell(_.updateState(actorRef)) end CircuitBreakerStateMachine diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 4f18d28e..858e5a72 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -192,7 +192,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit sleep(500.millis) circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Open] - // 1750ms: first operation failed more than 1s ago, second operation failed less than 1s ago; should we now go to half-open or stay at open? + // 1750ms: first operation failed more than 1s ago, second operation failed less than 1s ago and was ignored sleep(1.second) circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.HalfOpen] @@ -200,9 +200,63 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit sleep(500.millis) circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.HalfOpen] - // 3250ms: more than 2s after the last failing operation, should be closed + // 3250ms: at 2500ms 1 sec timeout on halfOpen state passes, we go back to open sleep(1.second) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Open] + + // 3750ms: at 3500ms we go to halfOpen again + sleep(1000.millis) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.HalfOpen] + } + + it should "correctly calculate metrics when results come in after state change" in supervised { + // given + val circuitBreaker = CircuitBreaker( + CircuitBreakerConfig.default.copy( + failureRateThreshold = PercentageThreshold(50), + minimumNumberOfCalls = 1, + slidingWindow = SlidingWindow.TimeBased(4.seconds), + numberOfCallsInHalfOpenState = 1, + waitDurationOpenState = 1.second, + halfOpenTimeoutDuration = 1.second + ) + ) + + // when + + // concurrently, run two failing operations + forkDiscard { + circuitBreaker.runOrDropEither { + sleep(500.millis) + Left("a") + } + } + forkDiscard { + circuitBreaker.runOrDropEither { + sleep(2.second) + Left("b") + } + } + + // then + + // 250ms: no operations complete yet, should be closed + sleep(250.millis) circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed + + // 750ms: the first operation failed, should be open + sleep(500.millis) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Open] + + // 1750ms: first operation failed more than 1s ago, second operation failed less than 1s ago and was ignored + sleep(1.second) + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.HalfOpen] + + // 2250ms: complete enough operations for halfOpen state - since success should switch back to Closed + sleep(500.millis) + circuitBreaker.runOrDropEither(Right("c")).discard + sleep(100.millis) // wait for state to register + circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed // Should go back to closed, we have one succesful operation } end CircuitBreakerTest From 24e70b7e278b993c52d30d6385c3a21f4718930b Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Wed, 22 Jan 2025 14:39:28 +0100 Subject: [PATCH 21/26] Don't count metrics refistered with different state, fix test --- .../scala/ox/resilience/CircuitBreaker.scala | 4 +-- .../CircuitBreakerStateMachine.scala | 31 ++++++++++++------- .../CircuitBreakerStateMachineTest.scala | 21 +++++++------ .../ox/resilience/CircuitBreakerTest.scala | 7 +++-- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index f98c8338..c430dd31 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -10,7 +10,7 @@ import scala.util.Try private[resilience] enum CircuitBreakerState: case Open(since: Long) - case Closed + case Closed(since: Long) case HalfOpen(since: Long, semaphore: Semaphore, completedOperations: Int = 0) private[resilience] enum CircuitBreakerResult: @@ -58,7 +58,7 @@ case class CircuitBreaker(config: CircuitBreakerConfig)(using Ox): private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) private def tryAcquire: AcquireResult = stateMachine.state match - case CircuitBreakerState.Closed => AcquireResult(true, CircuitBreakerState.Closed) + case currState @ CircuitBreakerState.Closed(_) => AcquireResult(true, currState) case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 60849204..faf7e82b 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -10,12 +10,19 @@ private[resilience] case class CircuitBreakerStateMachine( config: CircuitBreakerStateMachineConfig, private val results: CircuitBreakerResults )(using val ox: Ox): - @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed + @volatile private var _state: CircuitBreakerState = CircuitBreakerState.Closed(System.currentTimeMillis()) def state: CircuitBreakerState = _state def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = - results.updateResults(result) + // We check that result was acquired in the same state that we are currently in + val isResultFromCurrentState = (acquired.circuitState, _state) match + case (CircuitBreakerState.Open(sinceOpen), CircuitBreakerState.Open(since)) if since == sinceOpen => true + case (CircuitBreakerState.HalfOpen(sinceHalfOpen, _, _), CircuitBreakerState.HalfOpen(since, _, _)) if sinceHalfOpen == since => true + case (CircuitBreakerState.Closed(sinceClosed), CircuitBreakerState.Closed(since)) if sinceClosed == since => true + case _ => false + // If acquired in different state we don't update results + if isResultFromCurrentState then results.updateResults(result) updateState(selfRef, Some(acquired)) end registerResult @@ -36,7 +43,7 @@ private[resilience] case class CircuitBreakerStateMachine( // schedule switch to halfOpen after timeout updateAfter(config.waitDurationOpenState, selfRef) case ( - CircuitBreakerState.Open(_) | CircuitBreakerState.Closed, + CircuitBreakerState.Open(_) | CircuitBreakerState.Closed(_), CircuitBreakerState.HalfOpen(since, semaphore, completedOperations) ) => // schedule timeout for halfOpen state if is not 0 @@ -76,12 +83,12 @@ private[resilience] object CircuitBreakerStateMachine: val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls currentState match - case CircuitBreakerState.Closed => + case CircuitBreakerState.Closed(since) => if minCallsRecorder && exceededThreshold then if config.waitDurationOpenState.toMillis == 0 then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(currentTimestamp) - else CircuitBreakerState.Closed + else CircuitBreakerState.Closed(since) case CircuitBreakerState.Open(since) => val timePassed = (currentTimestamp - since) >= config.waitDurationOpenState.toMillis if timePassed || config.waitDurationOpenState.toMillis == 0 then @@ -94,7 +101,7 @@ private[resilience] object CircuitBreakerStateMachine: if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then CircuitBreakerState.Open(currentTimestamp) // If halfOpen calls were completed && rates are below we close breaker - else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed + else if allCallsInHalfOpenCompleted && !exceededThreshold then CircuitBreakerState.Closed(currentTimestamp) // If halfOpen calls completed, but rates are still above go back to open else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) // We didn't complete all half open calls, keep halfOpen @@ -132,11 +139,11 @@ private[resilience] object CircuitBreakerResults: import CircuitBreakerState.* // we have to match so we don't reset result when for example incrementing completed calls in halfopen state (oldState, newState) match - case (Closed, Open(_) | HalfOpen(_, _, _)) => + case (Closed(_), Open(_) | HalfOpen(_, _, _)) => clearResults - case (HalfOpen(_, _, _), Open(_) | Closed) => + case (HalfOpen(_, _, _), Open(_) | Closed(_)) => clearResults - case (Open(_), Closed | HalfOpen(_, _, _)) => + case (Open(_), Closed(_) | HalfOpen(_, _, _)) => clearResults case (_, _) => () end match @@ -215,11 +222,11 @@ private[resilience] object CircuitBreakerResults: import CircuitBreakerState.* // we have to match so we don't reset result when for example incrementing completed calls in halfopen state (oldState, newState) match - case (Closed, Open(_) | HalfOpen(_, _, _)) => + case (Closed(_), Open(_) | HalfOpen(_, _, _)) => clearResults() - case (HalfOpen(_, _, _), Open(_) | Closed) => + case (HalfOpen(_, _, _), Open(_) | Closed(_)) => clearResults() - case (Open(_), Closed | HalfOpen(_, _, _)) => + case (Open(_), Closed(_) | HalfOpen(_, _, _)) => clearResults() case (_, _) => () end match diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala index 9a37a723..ef87d83e 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala @@ -15,25 +15,27 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: // given val config = defaultConfig val stateMachine = CircuitBreakerStateMachine(config) + val currentTimstamp = System.currentTimeMillis() val lastResult: Option[AcquireResult] = None val metrics = - Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) + Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimstamp) // when - val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed, stateMachine.config) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimstamp), stateMachine.config) - resultingState shouldBe CircuitBreakerState.Closed + resultingState shouldBe a[CircuitBreakerState.Closed] } it should "go to open after surpasing failure threshold" in supervised { // given val config = defaultConfig val stateMachine = CircuitBreakerStateMachine(config) + val currentTimstamp = System.currentTimeMillis() val lastResult: Option[AcquireResult] = None - val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimstamp) // when - val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed, stateMachine.config) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimstamp), stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.Open] @@ -43,11 +45,12 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: // given val config = defaultConfig.copy(waitDurationOpenState = FiniteDuration(0, TimeUnit.MILLISECONDS)) val stateMachine = CircuitBreakerStateMachine(config) + val currentTimeStamp = System.currentTimeMillis() val lastResult: Option[AcquireResult] = None - val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, System.currentTimeMillis()) + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimeStamp) // when - val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed, stateMachine.config) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimeStamp), stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.HalfOpen] @@ -107,7 +110,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) // then - resultingState shouldBe CircuitBreakerState.Closed + resultingState shouldBe a[CircuitBreakerState.Closed] } it should "go to open after enough calls with bad metrics are recorded in halfOpen state" in supervised { @@ -141,7 +144,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: val resultingState = CircuitBreakerStateMachine.nextState(metrics, state, stateMachine.config) // then - resultingState shouldBe CircuitBreakerState.Closed + resultingState shouldBe a[CircuitBreakerState.Closed] } it should "go to half open after waitDurationOpenState passes" in supervised { diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 858e5a72..92de4e2b 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -186,7 +186,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit // 250ms: no operations complete yet, should be closed sleep(250.millis) - circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Closed] // 750ms: the first operation failed, should be open sleep(500.millis) @@ -242,7 +242,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit // 250ms: no operations complete yet, should be closed sleep(250.millis) - circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Closed] // 750ms: the first operation failed, should be open sleep(500.millis) @@ -256,7 +256,8 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit sleep(500.millis) circuitBreaker.runOrDropEither(Right("c")).discard sleep(100.millis) // wait for state to register - circuitBreaker.stateMachine.state shouldBe CircuitBreakerState.Closed // Should go back to closed, we have one succesful operation + // Should go back to closed, we have one succesful operation + circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Closed] } end CircuitBreakerTest From cf7dceae5a46c8664efaaaf67dfdd2310b5e3c33 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Wed, 22 Jan 2025 16:45:10 +0100 Subject: [PATCH 22/26] docs --- doc/_static/state-diagram-cb.svg | 1 + doc/index.md | 1 + doc/utils/circuit-breaker.md | 48 ++++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 12 deletions(-) create mode 100644 doc/_static/state-diagram-cb.svg diff --git a/doc/_static/state-diagram-cb.svg b/doc/_static/state-diagram-cb.svg new file mode 100644 index 00000000..792b7545 --- /dev/null +++ b/doc/_static/state-diagram-cb.svg @@ -0,0 +1 @@ +<svg aria-roledescription="stateDiagram" role="graphics-document document" viewBox="0 0 364.51666259765625 169" style="max-width: 364.51666259765625px;" class="statediagram" xmlns="http://www.w3.org/2000/svg" width="100%" id="export-svg"><style xmlns="http://www.w3.org/1999/xhtml">@import url("https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.2.0/css/all.min.css"); p {margin: 0;}</style><style>#export-svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:14px;fill:#333;}#export-svg .error-icon{fill:#ffffff;}#export-svg .error-text{fill:#000000;stroke:#000000;}#export-svg .edge-thickness-normal{stroke-width:1px;}#export-svg .edge-thickness-thick{stroke-width:3.5px;}#export-svg .edge-pattern-solid{stroke-dasharray:0;}#export-svg .edge-thickness-invisible{stroke-width:0;fill:none;}#export-svg .edge-pattern-dashed{stroke-dasharray:3;}#export-svg .edge-pattern-dotted{stroke-dasharray:2;}#export-svg .marker{fill:#000000;stroke:#000000;}#export-svg .marker.cross{stroke:#000000;}#export-svg svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:14px;}#export-svg p{margin:0;}#export-svg defs #statediagram-barbEnd{fill:#000000;stroke:#000000;}#export-svg g.stateGroup text{fill:#000000;stroke:none;font-size:10px;}#export-svg g.stateGroup text{fill:#333;stroke:none;font-size:10px;}#export-svg g.stateGroup .state-title{font-weight:bolder;fill:#333;}#export-svg g.stateGroup rect{fill:#ffffff;stroke:#000000;}#export-svg g.stateGroup line{stroke:#000000;stroke-width:1;}#export-svg .transition{stroke:#000000;stroke-width:1;fill:none;}#export-svg .stateGroup .composit{fill:#ffffff;border-bottom:1px;}#export-svg .stateGroup .alt-composit{fill:#e0e0e0;border-bottom:1px;}#export-svg .state-note{stroke:hsl(52.6829268293, 60%, 73.9215686275%);fill:#fff5ad;}#export-svg .state-note text{fill:#333;stroke:none;font-size:10px;}#export-svg .stateLabel .box{stroke:none;stroke-width:0;fill:#ffffff;opacity:0.5;}#export-svg .edgeLabel .label rect{fill:#ffffff;opacity:0.5;}#export-svg .edgeLabel{background-color:hsl(-120, 0%, 80%);text-align:center;}#export-svg .edgeLabel p{background-color:hsl(-120, 0%, 80%);}#export-svg .edgeLabel rect{opacity:0.5;background-color:hsl(-120, 0%, 80%);fill:hsl(-120, 0%, 80%);}#export-svg .edgeLabel .label text{fill:#333;}#export-svg .label div .edgeLabel{color:#333;}#export-svg .stateLabel text{fill:#333;font-size:10px;font-weight:bold;}#export-svg .node circle.state-start{fill:#000000;stroke:#000000;}#export-svg .node .fork-join{fill:#000000;stroke:#000000;}#export-svg .node circle.state-end{fill:#ffffff;stroke:#ffffff;stroke-width:1.5;}#export-svg [data-look="neo"].node circle.state-end{filter:none;stroke:#ffffff;fill:#000000;}#export-svg .end-state-inner{fill:#ffffff;stroke:#ffffff;stroke-width:1.5;}#export-svg .node rect{fill:#ffffff;stroke:#000000;stroke-width:1px;}#export-svg .node-rect-neo{fill:#ffffff;stroke:none;stroke-width:1px;}#export-svg .node polygon{fill:#ffffff;stroke:#000000;stroke-width:1px;}#export-svg #statediagram-barbEnd{fill:#000000;}#export-svg .statediagram-cluster rect{fill:#ffffff;stroke:#000000;stroke-width:1px;}#export-svg .cluster-label,#export-svg .nodeLabel{color:#333;line-height:1.0;}#export-svg .statediagram-cluster rect.outer{rx:3px;ry:3px;}#export-svg .statediagram-state .divider{stroke:#000000;}#export-svg .statediagram-state .title-state{rx:3px;ry:3px;}#export-svg .statediagram-cluster.statediagram-cluster .inner{fill:#ffffff;}#export-svg .statediagram-cluster.statediagram-cluster-alt .inner{fill:#f0f0f0;}#export-svg .statediagram-cluster .inner{rx:0;ry:0;}#export-svg .statediagram-state rect.basic{rx:3px;ry:3px;}#export-svg .state-shadow-neo{filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg .statediagram-state rect.divider{stroke-dasharray:10,10;fill:#f0f0f0;}#export-svg .note-edge{stroke-dasharray:5;}#export-svg .statediagram-note rect{fill:#fff5ad;stroke:hsl(52.6829268293, 60%, 73.9215686275%);stroke-width:1px;rx:0;ry:0;}#export-svg .statediagram-note rect{fill:#fff5ad;stroke:hsl(52.6829268293, 60%, 73.9215686275%);stroke-width:1px;rx:0;ry:0;}#export-svg .statediagram-note text{fill:#333;}#export-svg .statediagram-note .nodeLabel{color:#333;}#export-svg .node.statediagram-note rect{stroke:hsl(52.6829268293, 60%, 73.9215686275%)!important;}#export-svg .statediagram .edgeLabel{color:red;}#export-svg #dependencyStart,#export-svg #dependencyEnd{fill:#000000;stroke:#000000;stroke-width:1;}#export-svg .statediagramTitleText{text-anchor:middle;font-size:18px;fill:#333;}#export-svg .node .neo-node{stroke:#000000;}#export-svg [data-look="neo"].node rect,#export-svg [data-look="neo"].cluster rect,#export-svg [data-look="neo"].node polygon{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].node rect,#export-svg [data-look="neo"].node polygon,#export-svg [data-look="neo"].node path{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].node .neo-line path{stroke:hsl(0, 0%, 70%);filter:none;}#export-svg [data-look="neo"].node circle{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].node circle .state-start{fill:#000000;}#export-svg [data-look="neo"].statediagram-cluster rect{fill:#ffffff;stroke:url(#export-svg-gradient);stroke-width:1px;}#export-svg [data-look="neo"].icon-shape .icon{fill:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].icon-shape .icon-neo path{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;}#export-svg .red>*{fill:red!important;}#export-svg .red span{fill:red!important;}#export-svg .green>*{fill:green!important;}#export-svg .green span{fill:green!important;}#export-svg .yellow>*{fill:yellow!important;}#export-svg .yellow span{fill:yellow!important;}</style><g><defs><marker orient="auto" markerUnits="strokeWidth" markerHeight="14" markerWidth="20" refY="7" refX="19" id="export-svg_stateDiagram-barbEnd"><path d="M 19,7 L11,14 L13,7 L11,0 Z"/></marker></defs><g class="root"><g class="clusters"/><g class="edgePaths"><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6ODMuNTgzMzI4MjQ3MDcwMzEsInkiOjg0LjV9LHsieCI6MTEyLjQ3NDk5ODQ3NDEyMTEsInkiOjg0LjV9LHsieCI6MTQxLjM2NjY2ODcwMTE3MTg4LCJ5Ijo4NC41fV0=" data-id="edge0" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge0" d="M83.58332824707031,84.5L112.4749984741211,84.5L126.92083358764648,84.5L141.36666870117188,84.5"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6NjAuOTQ2OTY3Mzg1MDMxOTYsInkiOjY5LjV9LHsieCI6MTEyLjQ3NDk5ODQ3NDEyMTEsInkiOjE4LjV9LHsieCI6MTc0LjQ4MzMzNzQwMjM0Mzc1LCJ5IjoxOC41fSx7IngiOjIzNi40OTE2NzYzMzA1NjY0LCJ5IjoxOC41fSx7IngiOjI5NC4wMjc2NjMxNDQxOTgzNiwieSI6NjkuNX1d" data-id="edge1" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge1" d="M60.94696738503196,69.5L103.13062351459659,27.748618914078016Q112.4749984741211,18.5 125.62240488002602,18.5L174.48333740234375,18.5L222.39646232476852,18.5Q236.4916763305664,18.5 247.03958697655486,27.84968656552104L265.2596697373824,44L294.02766314419836,69.5"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6MjA3LjYwMDAwNjEwMzUxNTYyLCJ5Ijo3My41NTE2MDYyODIxODI5NX0seyJ4IjoyMzYuNDkxNjc2MzMwNTY2NCwieSI6NjR9LHsieCI6MjY1LjM4MzM0NjU1NzYxNzIsInkiOjcxLjk1NDUwNTQ2NTEzOTYyfV0=" data-id="edge2" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge2" d="M207.60000610351562,73.55160628218295L229.3786410670248,66.35157440794201Q236.4916763305664,64 243.7145938873291,65.9886263662849L250.9375114440918,67.97725273256981L265.3833465576172,71.95450546513962"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6MjY1LjM4MzM0NjU1NzYxNzIsInkiOjk3LjA0NTQ5NDUzNDg2MDM4fSx7IngiOjIzNi40OTE2NzYzMzA1NjY0LCJ5IjoxMDV9LHsieCI6MjA3LjYwMDAwNjEwMzUxNTYyLCJ5Ijo5NS40NDgzOTM3MTc4MTcwNX1d" data-id="edge3" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge3" d="M265.3833465576172,97.04549453486038L243.8261736441387,102.98065329881007Q236.4916763305664,105 229.2687587738037,102.61209842945426L222.04584121704102,100.22419685890853L207.60000610351562,95.44839371781705"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6Mjk0LjAyNzY2MzE0NDE5ODM2LCJ5Ijo5OS41fSx7IngiOjIzNi40OTE2NzYzMzA1NjY0LCJ5IjoxNTAuNX0seyJ4IjoxNzQuNDgzMzM3NDAyMzQzNzUsInkiOjE1MC41fSx7IngiOjExMi40NzQ5OTg0NzQxMjExLCJ5IjoxNTAuNX0seyJ4Ijo2MC45NDY5NjczODUwMzE5NiwieSI6OTkuNX1d" data-id="edge4" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge4" d="M294.02766314419836,99.5L247.03958697655483,141.15031343447896Q236.4916763305664,150.5 222.39646232476852,150.5L174.48333740234375,150.5L125.62240488002602,150.5Q112.4749984741211,150.5 103.13062351459659,141.251381085922L86.71098292957653,125L60.94696738503196,99.5"/></g><g class="edgeLabels"><g transform="translate(112.4749984741211, 84.5)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge0" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>1</p></span></div></foreignObject></g></g><g transform="translate(174.48333740234375, 18.5)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge1" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>2</p></span></div></foreignObject></g></g><g transform="translate(236.4916763305664, 64)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge2" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>3</p></span></div></foreignObject></g></g><g transform="translate(236.4916763305664, 105)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge3" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>4</p></span></div></foreignObject></g></g><g transform="translate(174.48333740234375, 150.5)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge4" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>5</p></span></div></foreignObject></g></g></g><g class="nodes"><g transform="translate(45.791664123535156, 84.5)" data-look="neo" data-et="node" data-node="true" data-id="Closed" id="state-Closed-4" class="node green statediagram-state"><rect stroke="url(#gradient)" height="30" width="75.58332824707031" y="-15" x="-37.791664123535156" ry="3" data-id="Closed" rx="3" style="fill:green !important" class="basic label-container"/><g transform="translate(-21.791664123535156, -7)" style="" class="label"><rect/><foreignObject height="14" width="43.58332824707031"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>Closed</p></span></div></foreignObject></g></g><g transform="translate(174.48333740234375, 84.5)" data-look="neo" data-et="node" data-node="true" data-id="Open" id="state-Open-3" class="node red statediagram-state"><rect stroke="url(#gradient)" height="30" width="66.23333740234375" y="-15" x="-33.116668701171875" ry="3" data-id="Open" rx="3" style="fill:red !important" class="basic label-container"/><g transform="translate(-17.116668701171875, -7)" style="" class="label"><rect/><foreignObject height="14" width="34.23333740234375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>Open</p></span></div></foreignObject></g></g><g transform="translate(310.95001220703125, 84.5)" data-look="neo" data-et="node" data-node="true" data-id="HalfOpen" id="state-HalfOpen-4" class="node yellow statediagram-state"><rect stroke="url(#gradient)" height="30" width="91.13333129882812" y="-15" x="-45.56666564941406" ry="3" data-id="HalfOpen" rx="3" style="fill:yellow !important" class="basic label-container"/><g transform="translate(-29.566665649414062, -7)" style="" class="label"><rect/><foreignObject height="14" width="59.133331298828125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>HalfOpen</p></span></div></foreignObject></g></g></g></g></g><linearGradient y2="0%" x2="100%" y1="0%" x1="0%" gradientUnits="objectBoundingBox" id="export-svg-gradient"><stop stop-opacity="1" stop-color="#0042eb" offset="0%"/><stop stop-opacity="1" stop-color="#eb0042" offset="100%"/></linearGradient></svg> \ No newline at end of file diff --git a/doc/index.md b/doc/index.md index f19bc009..dcb51002 100644 --- a/doc/index.md +++ b/doc/index.md @@ -74,6 +74,7 @@ In addition to this documentation, ScalaDocs can be browsed at [https://javadoc. utils/resources utils/control-flow utils/actors + utils/circuit-breaker utils/utility .. toctree:: diff --git a/doc/utils/circuit-breaker.md b/doc/utils/circuit-breaker.md index 67db0ba7..6f5f7c0b 100644 --- a/doc/utils/circuit-breaker.md +++ b/doc/utils/circuit-breaker.md @@ -42,16 +42,40 @@ Those metrics are considered only when number of recorder calls is greater or eq ### Parameters -- `failureRateThreshold: Int = 50` - percentage of recorder calls marked as failed required to switch to open state -- `slowCallThreshold: Int = 50` - percentage of recorder calls marked as slow required to switch to open state -- `slowCallDurationThreshold: FiniteDuration = 60.seconds` - duration that call has to exceed to be marked as slow -- `slidingWindow: SlidingWindow = SlidingWindow.CountBased(100)` - mechanism to determine how many calls are recorded -- `minimumNumberOfCalls: Int = 20` - minium number of calls recored for breaker to be able to swtich to open state based on thresholds -- `waitDurationOpenState: FiniteDuration = FiniteDuration(10, TimeUnit.SECONDS)` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen` -- `halfOpenTimeoutDuration: FiniteDuration = FiniteDuration(0, TimeUnit.MILLISECONDS)` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state -- `numberOfCallsInHalfOpenState: Int = 10` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed` +- `failureRateThreshold: PercentageThreshold` - percentage of recorder calls marked as failed required to switch to open state +- `slowCallThreshold: PercentageThreshold` - percentage of recorder calls marked as slow required to switch to open state +- `slowCallDurationThreshold: FiniteDuration` - duration that call has to exceed to be marked as slow +- `slidingWindow: SlidingWindow` - mechanism to determine how many calls are recorded +- `minimumNumberOfCalls: Int` - minium number of calls recored for breaker to be able to swtich to open state based on thresholds +- `waitDurationOpenState: FiniteDuration` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen` +- `halfOpenTimeoutDuration: FiniteDuration` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state +- `numberOfCallsInHalfOpenState: Int` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed` + +Values defined in `CircuitBreakerConfig.default`: +``` +failureRateThreshold = PercentageThreshold(50) +slowCallThreshold = PercentageThreshold(50) +slowCallDurationThreshold = 60.seconds +slidingWindow = SlidingWindow.CountBased(100) +minimumNumberOfCalls = 20 +waitDurationOpenState = FiniteDuration(10, TimeUnit.SECONDS) +halfOpenTimeoutDuration = FiniteDuration(0, TimeUnit.MILLISECONDS) +numberOfCallsInHalfOpenState = 10 +``` + +## Conditions for state change + + + -## Examples +1. State changes from `Closed` to `Open` after any threshold was exceeded (failureThreshold or slowThreshold) and number of recorder calls is equal or greater than minimumNumberOfCalls. +2. State changes from `Closed` to `HalfOpen` if any threshold was exceeded, number of recorder calls is equal or greater than `minimumNumberOfCalls` and `waitDurationOpenState` is zero. +3. State changes from `Open` to `HalfOpen` when `waitDurationOpenState` passes. +4. State changes from `HalfOpen` to `Open` if `halfOpenTimeoutDuration` passes without enough calls recorded or number of recorder calls is equal to `numberOfCallsInHalfOpenState` and any threshold was exceeded. +5. State changes from `HalfOpen` to `Closed` if `numberOfCallsInHalfOpenState` where completed before timeout and there wasn't any threshold exceeded. + + +## Examples ```scala mdoc:compile-only import ox.UnionMode @@ -65,14 +89,14 @@ def unionOperation: String | Int = ??? supervised: val ciruictBreaker = CircuitBreaker(CircuitBreakerConfig.default) - + // various operation definitions ciruictBreaker.runOrDrop(directOperation) ciruictBreaker.runOrDropEither(eitherOperation) - + // custom error mode ciruictBreaker.runOrDropWithErrorMode(UnionMode[String])(unionOperation) - + // retry with circuit breaker inside retry(RetryConfig.backoff(3, 100.millis)){ ciruictBreaker.runOrDrop(directOperation).get From 4e01101f47798acc87d6eee73fed5742b2192de8 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Thu, 23 Jan 2025 10:00:02 +0100 Subject: [PATCH 23/26] docs grammar fixes, better working example --- doc/utils/circuit-breaker.md | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/doc/utils/circuit-breaker.md b/doc/utils/circuit-breaker.md index 6f5f7c0b..4a49731f 100644 --- a/doc/utils/circuit-breaker.md +++ b/doc/utils/circuit-breaker.md @@ -3,7 +3,7 @@ The circuit breaker allows controlling execution of operations and stops if certain condition are met. CircuitBreaker is thread-safe and uses [actor](./actors.md) underneath to change breaker state. ```{note} -Since actor executes on one thread which may be bottleneck. That means that calculating state change can be deleyad and breaker can let few more operations to complete before openning. +Since actor executes on one thread this may be bottleneck. That means that calculating state change can be deleyad and breaker can let few more operations to complete before openning. This can be the case with many very fast operations. ``` @@ -36,22 +36,28 @@ There are two ways that metrics are calculated. The state of the CircuitBreaker changes from `Closed` to `Open` when the `failureRate` is greater or equal to configurable threshold. For example when 80% of recorded call results failed. Failures are counted based on provided `ErrorMode`. -The same state change also happen when percentage of slow calls (exceeding `slowCallDurationThreshold`) is equal or greater than configured threshold. For exmaple 80% of calls took longer then 10 seconds. +The same state change also happen when percentage of slow calls (exceeding `slowCallDurationThreshold`) is equal or greater than configured threshold. For examaple 80% of calls took longer then 10 seconds. Those metrics are considered only when number of recorder calls is greater or equal to `minimumNumberOfCalls`, otherwise we don't change state even if `failureRate` is 100%. ### Parameters -- `failureRateThreshold: PercentageThreshold` - percentage of recorder calls marked as failed required to switch to open state -- `slowCallThreshold: PercentageThreshold` - percentage of recorder calls marked as slow required to switch to open state -- `slowCallDurationThreshold: FiniteDuration` - duration that call has to exceed to be marked as slow -- `slidingWindow: SlidingWindow` - mechanism to determine how many calls are recorded -- `minimumNumberOfCalls: Int` - minium number of calls recored for breaker to be able to swtich to open state based on thresholds -- `waitDurationOpenState: FiniteDuration` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen` -- `halfOpenTimeoutDuration: FiniteDuration` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state -- `numberOfCallsInHalfOpenState: Int` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed` +- `failureRateThreshold: PercentageThreshold` - percentage of recorder calls marked as failed required to switch to open state. +- `slowCallThreshold: PercentageThreshold` - percentage of recorder calls marked as slow required to switch to open state. +- `slowCallDurationThreshold: FiniteDuration` - duration that call has to exceed to be marked as slow. +- `slidingWindow: SlidingWindow` - mechanism to determine how calls are recorded. +- `minimumNumberOfCalls: Int` - minium number of calls recorded needed for breaker to be able to swtich to open state based on thresholds. +- `waitDurationOpenState: FiniteDuration` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen`. +- `halfOpenTimeoutDuration: FiniteDuration` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state. Zero means there is no timeout. +- `numberOfCallsInHalfOpenState: Int` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed`. It is also maximum number of operations that can be started in this state. + +`SlidingWindow` variants: + +- `CountBased(windowSize: Int)` - This variant calculates metrics based on last n results of calls recorded. These statistics are cleared on every state change. +- `TimeBased(duration: FiniteDuration)` - This variant calculates metrics of operations in the lapse of `duraiton` before current time. These statistics are cleared on every state change. Values defined in `CircuitBreakerConfig.default`: + ``` failureRateThreshold = PercentageThreshold(50) slowCallThreshold = PercentageThreshold(50) @@ -67,14 +73,12 @@ numberOfCallsInHalfOpenState = 10  - -1. State changes from `Closed` to `Open` after any threshold was exceeded (failureThreshold or slowThreshold) and number of recorder calls is equal or greater than minimumNumberOfCalls. +1. State changes from `Closed` to `Open` after any threshold was exceeded (`failureThreshold` or `slowThreshold`) and number of recorder calls is equal or greater than `minimumNumberOfCalls`. 2. State changes from `Closed` to `HalfOpen` if any threshold was exceeded, number of recorder calls is equal or greater than `minimumNumberOfCalls` and `waitDurationOpenState` is zero. 3. State changes from `Open` to `HalfOpen` when `waitDurationOpenState` passes. 4. State changes from `HalfOpen` to `Open` if `halfOpenTimeoutDuration` passes without enough calls recorded or number of recorder calls is equal to `numberOfCallsInHalfOpenState` and any threshold was exceeded. 5. State changes from `HalfOpen` to `Closed` if `numberOfCallsInHalfOpenState` where completed before timeout and there wasn't any threshold exceeded. - ## Examples ```scala mdoc:compile-only @@ -98,7 +102,9 @@ supervised: ciruictBreaker.runOrDropWithErrorMode(UnionMode[String])(unionOperation) // retry with circuit breaker inside - retry(RetryConfig.backoff(3, 100.millis)){ - ciruictBreaker.runOrDrop(directOperation).get + retryEither(RetryConfig.backoff(3, 100.millis)){ + ciruictBreaker.runOrDrop(directOperation) match + case Some(value) => Right(value) + case None => Left("Operation dropped") } ``` From dc6d93eb36053a96986d792b90af56dbd1c366c1 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 27 Jan 2025 13:47:14 +0100 Subject: [PATCH 24/26] minor fixes, spell check on docs --- .../scala/ox/resilience/CircuitBreaker.scala | 18 ++++- .../CircuitBreakerStateMachine.scala | 74 +++++-------------- doc/utils/circuit-breaker.md | 34 +++++---- 3 files changed, 53 insertions(+), 73 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index c430dd31..1f270166 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -12,6 +12,13 @@ private[resilience] enum CircuitBreakerState: case Open(since: Long) case Closed(since: Long) case HalfOpen(since: Long, semaphore: Semaphore, completedOperations: Int = 0) + def isSameState(other: CircuitBreakerState): Boolean = + (this, other) match + case (Open(sinceOpen), Open(since)) if since == sinceOpen => true + case (HalfOpen(sinceHalfOpen, _, _), HalfOpen(since, _, _)) if sinceHalfOpen == since => true + case (Closed(sinceClosed), Closed(since)) if sinceClosed == since => true + case _ => false +end CircuitBreakerState private[resilience] enum CircuitBreakerResult: case Success @@ -53,11 +60,11 @@ end CircuitBreakerStateMachineConfig /** Circuit Breaker. Operations can be dropped, when the breaker is open or if it doesn't take more operation in halfOpen state. The Circuit * Breaker might calculate different metrics based on [[SlidingWindow]] provided in config. See [[SlidingWindow]] for more details. */ -case class CircuitBreaker(config: CircuitBreakerConfig)(using Ox): +case class CircuitBreaker(config: CircuitBreakerConfig)(using ox: Ox, bufferCapacity: BufferCapacity): private[resilience] val stateMachine = CircuitBreakerStateMachine(config) - private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine)(using sc = BufferCapacity.apply(100)) + private val actorRef: ActorRef[CircuitBreakerStateMachine] = Actor.create(stateMachine) - private def tryAcquire: AcquireResult = stateMachine.state match + private def tryAcquire(): AcquireResult = stateMachine.state match case currState @ CircuitBreakerState.Closed(_) => AcquireResult(true, currState) case currState @ CircuitBreakerState.Open(_) => AcquireResult(false, currState) case currState @ CircuitBreakerState.HalfOpen(_, semaphore, _) => AcquireResult(semaphore.tryAcquire(1), currState) @@ -73,7 +80,7 @@ case class CircuitBreaker(config: CircuitBreakerConfig)(using Ox): def runOrDropWithErrorMode[E, F[_], T](em: ErrorMode[E, F])( operation: => F[T] ): Option[F[T]] = - val acquiredResult = tryAcquire + val acquiredResult = tryAcquire() if acquiredResult.acquired then val (duration, result) = timed(operation) if em.isError(result) then @@ -116,3 +123,6 @@ case class CircuitBreaker(config: CircuitBreakerConfig)(using Ox): def runOrDrop[T](operation: => T): Option[T] = runOrDropEither(Try(operation).toEither).map(_.fold(throw _, identity)) end CircuitBreaker + +object CircuitBreaker: + given default: BufferCapacity = BufferCapacity.apply(100) diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index faf7e82b..275e5e73 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -15,15 +15,10 @@ private[resilience] case class CircuitBreakerStateMachine( def state: CircuitBreakerState = _state def registerResult(result: CircuitBreakerResult, acquired: AcquireResult, selfRef: ActorRef[CircuitBreakerStateMachine]): Unit = - // We check that result was acquired in the same state that we are currently in - val isResultFromCurrentState = (acquired.circuitState, _state) match - case (CircuitBreakerState.Open(sinceOpen), CircuitBreakerState.Open(since)) if since == sinceOpen => true - case (CircuitBreakerState.HalfOpen(sinceHalfOpen, _, _), CircuitBreakerState.HalfOpen(since, _, _)) if sinceHalfOpen == since => true - case (CircuitBreakerState.Closed(sinceClosed), CircuitBreakerState.Closed(since)) if sinceClosed == since => true - case _ => false // If acquired in different state we don't update results - if isResultFromCurrentState then results.updateResults(result) - updateState(selfRef, Some(acquired)) + if acquired.circuitState.isSameState(_state) then + results.updateResults(result) + updateState(selfRef, Some(acquired)) end registerResult def updateState(selfRef: ActorRef[CircuitBreakerStateMachine], acquiredResult: Option[AcquireResult] = None): Unit = @@ -76,27 +71,24 @@ private[resilience] object CircuitBreakerStateMachine: def nextState(metrics: Metrics, currentState: CircuitBreakerState, config: CircuitBreakerStateMachineConfig): CircuitBreakerState = val currentTimestamp = metrics.timestamp - // We want to know if last result should be added to completed calls in halfOpen state - val lastCompletedCall = metrics.lastAcquisitionResult match - case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, sem, completed))) => 1 - case _ => 0 val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls currentState match - case CircuitBreakerState.Closed(since) => + case self @ CircuitBreakerState.Closed(since) => if minCallsRecorder && exceededThreshold then if config.waitDurationOpenState.toMillis == 0 then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) else CircuitBreakerState.Open(currentTimestamp) - else CircuitBreakerState.Closed(since) - case CircuitBreakerState.Open(since) => + else self + case self @ CircuitBreakerState.Open(since) => val timePassed = (currentTimestamp - since) >= config.waitDurationOpenState.toMillis - if timePassed || config.waitDurationOpenState.toMillis == 0 then - CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) - else CircuitBreakerState.Open(since) + if timePassed then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) + else self case CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) => - lazy val allCallsInHalfOpenCompleted = (completedCalls + lastCompletedCall) >= config.numberOfCallsInHalfOpenState - lazy val timePassed = (currentTimestamp - since) >= config.halfOpenTimeoutDuration.toMillis + // We want to know if last result should be added to completed calls in halfOpen state + val lastCompletedCall = if metrics.lastAcquisitionResult.isDefined then 1 else 0 + val allCallsInHalfOpenCompleted = (completedCalls + lastCompletedCall) >= config.numberOfCallsInHalfOpenState + val timePassed = (currentTimestamp - since) >= config.halfOpenTimeoutDuration.toMillis // if we didn't complete all half open calls but timeout is reached go back to open if !allCallsInHalfOpenCompleted && config.halfOpenTimeoutDuration.toMillis != 0 && timePassed then CircuitBreakerState.Open(currentTimestamp) @@ -105,12 +97,7 @@ private[resilience] object CircuitBreakerStateMachine: // If halfOpen calls completed, but rates are still above go back to open else if allCallsInHalfOpenCompleted && exceededThreshold then CircuitBreakerState.Open(currentTimestamp) // We didn't complete all half open calls, keep halfOpen - else - metrics.lastAcquisitionResult match - case Some(AcquireResult(true, CircuitBreakerState.HalfOpen(s, _, _))) - if s == since => // Check if this is the same HalfOpen state - CircuitBreakerState.HalfOpen(since, semaphore, completedCalls + 1) - case _ => CircuitBreakerState.HalfOpen(since, semaphore, completedCalls) + else CircuitBreakerState.HalfOpen(since, semaphore, completedCalls + lastCompletedCall) end if end match end nextState @@ -124,30 +111,19 @@ private[resilience] sealed trait CircuitBreakerResults(using val ox: Ox): private[resilience] object CircuitBreakerResults: case class CountBased(windowSize: Int)(using ox: Ox) extends CircuitBreakerResults(using ox): - private val results = new collection.mutable.ArrayDeque[CircuitBreakerResult](windowSize) + private val results = new collection.mutable.ArrayDeque[CircuitBreakerResult](windowSize + 1) private var slowCalls = 0 private var failedCalls = 0 private var successCalls = 0 - private def clearResults: Unit = + private def clearResults(): Unit = results.clear() slowCalls = 0 failedCalls = 0 successCalls = 0 def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = - import CircuitBreakerState.* - // we have to match so we don't reset result when for example incrementing completed calls in halfopen state - (oldState, newState) match - case (Closed(_), Open(_) | HalfOpen(_, _, _)) => - clearResults - case (HalfOpen(_, _, _), Open(_) | Closed(_)) => - clearResults - case (Open(_), Closed(_) | HalfOpen(_, _, _)) => - clearResults - case (_, _) => () - end match - end onStateChange + if !oldState.isSameState(newState) then clearResults() def updateResults(result: CircuitBreakerResult): Unit = result match @@ -178,7 +154,7 @@ private[resilience] object CircuitBreakerResults: end CountBased case class TimeWindowBased(windowDuration: FiniteDuration)(using ox: Ox) extends CircuitBreakerResults(using ox): - // holds timestamp of recored operation and result + // holds timestamp of recorded operation and result private val results = collection.mutable.ArrayDeque[(Long, CircuitBreakerResult)]() private var slowCalls = 0 private var failedCalls = 0 @@ -191,7 +167,7 @@ private[resilience] object CircuitBreakerResults: successCalls = 0 def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = - // filter all entries that happend outside sliding window + // filter all entries that happened outside sliding window val removed = results.removeHeadWhile((time, _) => timestamp > time + windowDuration.toMillis) removed.foreach { (_, result) => result match @@ -219,17 +195,7 @@ private[resilience] object CircuitBreakerResults: results.addOne((System.currentTimeMillis(), result)) def onStateChange(oldState: CircuitBreakerState, newState: CircuitBreakerState): Unit = - import CircuitBreakerState.* - // we have to match so we don't reset result when for example incrementing completed calls in halfopen state - (oldState, newState) match - case (Closed(_), Open(_) | HalfOpen(_, _, _)) => - clearResults() - case (HalfOpen(_, _, _), Open(_) | Closed(_)) => - clearResults() - case (Open(_), Closed(_) | HalfOpen(_, _, _)) => - clearResults() - case (_, _) => () - end match - end onStateChange + if !oldState.isSameState(newState) then clearResults() + end TimeWindowBased end CircuitBreakerResults diff --git a/doc/utils/circuit-breaker.md b/doc/utils/circuit-breaker.md index 4a49731f..0a342c9b 100644 --- a/doc/utils/circuit-breaker.md +++ b/doc/utils/circuit-breaker.md @@ -1,11 +1,6 @@ # Circuit Breaker -The circuit breaker allows controlling execution of operations and stops if certain condition are met. CircuitBreaker is thread-safe and uses [actor](./actors.md) underneath to change breaker state. - -```{note} -Since actor executes on one thread this may be bottleneck. That means that calculating state change can be deleyad and breaker can let few more operations to complete before openning. -This can be the case with many very fast operations. -``` +The circuit breaker allows controlling execution of operations and stops if certain condition are met. CircuitBreaker is thread-safe and can be used in concurrent scenarios. ## API @@ -24,6 +19,9 @@ supervised: ## Configuration +Many config parameters relate to calculated metrics. Those metrics are percentage of calls that failed and percentage of calls that exceeded `slowCallDurationThreshold`. +Which calls are included during calculation of these metrics are determined by `SlidingWindow` configuration. + ### Sliding window There are two ways that metrics are calculated. @@ -34,19 +32,19 @@ There are two ways that metrics are calculated. ### Failure rate and slow call rate thresholds The state of the CircuitBreaker changes from `Closed` to `Open` when the `failureRate` is greater or equal to configurable threshold. For example when 80% of recorded call results failed. -Failures are counted based on provided `ErrorMode`. +Failures are counted based on provided `ErrorMode`. For example any exception that is thrown by the operation, when using the direct, "unwrapped" API or any `Left` variant when using `runOrDropEither`. -The same state change also happen when percentage of slow calls (exceeding `slowCallDurationThreshold`) is equal or greater than configured threshold. For examaple 80% of calls took longer then 10 seconds. +The same state change also happen when percentage of slow calls (exceeding configurable `slowCallDurationThreshold`) is equal or greater than configured threshold. For example 80% of calls took longer then 10 seconds. Those metrics are considered only when number of recorder calls is greater or equal to `minimumNumberOfCalls`, otherwise we don't change state even if `failureRate` is 100%. ### Parameters -- `failureRateThreshold: PercentageThreshold` - percentage of recorder calls marked as failed required to switch to open state. +- `failureRateThreshold: PercentageThreshold` - percentage of recorded calls marked as failed required to switch to open state. - `slowCallThreshold: PercentageThreshold` - percentage of recorder calls marked as slow required to switch to open state. - `slowCallDurationThreshold: FiniteDuration` - duration that call has to exceed to be marked as slow. - `slidingWindow: SlidingWindow` - mechanism to determine how calls are recorded. -- `minimumNumberOfCalls: Int` - minium number of calls recorded needed for breaker to be able to swtich to open state based on thresholds. +- `minimumNumberOfCalls: Int` - minimum number of calls recorded needed for breaker to be able to switch to open state based on thresholds. - `waitDurationOpenState: FiniteDuration` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen`. - `halfOpenTimeoutDuration: FiniteDuration` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state. Zero means there is no timeout. - `numberOfCallsInHalfOpenState: Int` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed`. It is also maximum number of operations that can be started in this state. @@ -79,6 +77,12 @@ numberOfCallsInHalfOpenState = 10 4. State changes from `HalfOpen` to `Open` if `halfOpenTimeoutDuration` passes without enough calls recorded or number of recorder calls is equal to `numberOfCallsInHalfOpenState` and any threshold was exceeded. 5. State changes from `HalfOpen` to `Closed` if `numberOfCallsInHalfOpenState` where completed before timeout and there wasn't any threshold exceeded. + +```{note} +CircuitBreaker uses actor internally and since actor executes on one thread this may be bottleneck. That means that calculating state change can be deleyad and breaker can let few more operations to complete before openning. +This can be the case with many very fast operations. +``` + ## Examples ```scala mdoc:compile-only @@ -92,18 +96,18 @@ def eitherOperation: Either[String, Int] = ??? def unionOperation: String | Int = ??? supervised: - val ciruictBreaker = CircuitBreaker(CircuitBreakerConfig.default) + val circuitBreaker = CircuitBreaker(CircuitBreakerConfig.default) // various operation definitions - ciruictBreaker.runOrDrop(directOperation) - ciruictBreaker.runOrDropEither(eitherOperation) + circuitBreaker.runOrDrop(directOperation) + circuitBreaker.runOrDropEither(eitherOperation) // custom error mode - ciruictBreaker.runOrDropWithErrorMode(UnionMode[String])(unionOperation) + circuitBreaker.runOrDropWithErrorMode(UnionMode[String])(unionOperation) // retry with circuit breaker inside retryEither(RetryConfig.backoff(3, 100.millis)){ - ciruictBreaker.runOrDrop(directOperation) match + circuitBreaker.runOrDrop(directOperation) match case Some(value) => Right(value) case None => Left("Operation dropped") } From 51837686506f98351b0f8892e60f0183ac2d0382 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski <kamillont14@gmail.com> Date: Mon, 27 Jan 2025 14:59:43 +0100 Subject: [PATCH 25/26] more spell corrections, docs --- .../scala/ox/resilience/CircuitBreaker.scala | 8 +-- .../ox/resilience/CircuitBreakerConfig.scala | 17 +++--- .../CircuitBreakerStateMachine.scala | 18 ++++--- .../CircuitBreakerStateMachineTest.scala | 18 +++---- .../ox/resilience/CircuitBreakerTest.scala | 6 +-- doc/utils/circuit-breaker.md | 53 ++++++++++++++++--- 6 files changed, 82 insertions(+), 38 deletions(-) diff --git a/core/src/main/scala/ox/resilience/CircuitBreaker.scala b/core/src/main/scala/ox/resilience/CircuitBreaker.scala index 1f270166..d1276688 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreaker.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreaker.scala @@ -36,8 +36,8 @@ private[resilience] case class Metrics( private[resilience] case class AcquireResult(acquired: Boolean, circuitState: CircuitBreakerState) private case class CircuitBreakerStateMachineConfig( - failureRateThreshold: Int, - slowCallThreshold: Int, + failureRateThreshold: PercentageThreshold, + slowCallThreshold: PercentageThreshold, slowCallDurationThreshold: FiniteDuration, minimumNumberOfCalls: Int, numberOfCallsInHalfOpenState: Int, @@ -47,8 +47,8 @@ private case class CircuitBreakerStateMachineConfig( private object CircuitBreakerStateMachineConfig: def fromConfig(c: CircuitBreakerConfig): CircuitBreakerStateMachineConfig = CircuitBreakerStateMachineConfig( - failureRateThreshold = c.failureRateThreshold.toInt, - slowCallThreshold = c.slowCallThreshold.toInt, + failureRateThreshold = c.failureRateThreshold, + slowCallThreshold = c.slowCallThreshold, slowCallDurationThreshold = c.slowCallDurationThreshold, minimumNumberOfCalls = c.minimumNumberOfCalls, numberOfCallsInHalfOpenState = c.numberOfCallsInHalfOpenState, diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala index ce197b18..d6b9efd5 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerConfig.scala @@ -1,20 +1,19 @@ package ox.resilience import scala.concurrent.duration.* -import java.util.concurrent.TimeUnit /** Allows to configure how [[Metrics]] will be calculated */ enum SlidingWindow: /** Window counting last n operations when calculating metrics. * @param windowSize - * number of last n results recored. + * number of last n results recorded. */ case CountBased(windowSize: Int) - /** Window counting operations in the lapse of `duraiton` before current time. + /** Window counting operations in the lapse of `duration` before current time. * @param duration - * span of time where results are considered for including in metrics. + * span of time in which results are included in metrics. */ case TimeBased(duration: FiniteDuration) end SlidingWindow @@ -22,7 +21,9 @@ end SlidingWindow /** Type representing percentage threshold between 0 and 100 */ opaque type PercentageThreshold = Int -extension (c: PercentageThreshold) def toInt: Int = c +extension (c: PercentageThreshold) + def toInt: Int = c + def isExceeded(by: Int): Boolean = by >= c object PercentageThreshold: def apply(c: Int): PercentageThreshold = @@ -68,11 +69,11 @@ object CircuitBreakerConfig: def default: CircuitBreakerConfig = CircuitBreakerConfig( failureRateThreshold = PercentageThreshold(50), slowCallThreshold = PercentageThreshold(50), - slowCallDurationThreshold = 60.seconds, + slowCallDurationThreshold = 10.seconds, slidingWindow = SlidingWindow.CountBased(100), minimumNumberOfCalls = 20, - waitDurationOpenState = FiniteDuration(10, TimeUnit.SECONDS), - halfOpenTimeoutDuration = FiniteDuration(0, TimeUnit.MILLISECONDS), + waitDurationOpenState = 10.seconds, + halfOpenTimeoutDuration = 0.millis, numberOfCallsInHalfOpenState = 10 ) end CircuitBreakerConfig diff --git a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala index 275e5e73..e96eb470 100644 --- a/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala +++ b/core/src/main/scala/ox/resilience/CircuitBreakerStateMachine.scala @@ -39,7 +39,7 @@ private[resilience] case class CircuitBreakerStateMachine( updateAfter(config.waitDurationOpenState, selfRef) case ( CircuitBreakerState.Open(_) | CircuitBreakerState.Closed(_), - CircuitBreakerState.HalfOpen(since, semaphore, completedOperations) + CircuitBreakerState.HalfOpen(_, _, _) ) => // schedule timeout for halfOpen state if is not 0 if config.halfOpenTimeoutDuration.toMillis != 0 then updateAfter(config.halfOpenTimeoutDuration, selfRef) @@ -71,10 +71,11 @@ private[resilience] object CircuitBreakerStateMachine: def nextState(metrics: Metrics, currentState: CircuitBreakerState, config: CircuitBreakerStateMachineConfig): CircuitBreakerState = val currentTimestamp = metrics.timestamp - val exceededThreshold = (metrics.failureRate >= config.failureRateThreshold || metrics.slowCallsRate >= config.slowCallThreshold) + val exceededThreshold = + config.failureRateThreshold.isExceeded(metrics.failureRate) || config.slowCallThreshold.isExceeded(metrics.slowCallsRate) val minCallsRecorder = metrics.operationsInWindow >= config.minimumNumberOfCalls currentState match - case self @ CircuitBreakerState.Closed(since) => + case self @ CircuitBreakerState.Closed(_) => if minCallsRecorder && exceededThreshold then if config.waitDurationOpenState.toMillis == 0 then CircuitBreakerState.HalfOpen(currentTimestamp, Semaphore(config.numberOfCallsInHalfOpenState)) @@ -110,6 +111,9 @@ private[resilience] sealed trait CircuitBreakerResults(using val ox: Ox): def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics private[resilience] object CircuitBreakerResults: + private object Percentage: + def of(observed: Int, size: Int): Int = ((observed / size.toFloat) * 100).toInt + case class CountBased(windowSize: Int)(using ox: Ox) extends CircuitBreakerResults(using ox): private val results = new collection.mutable.ArrayDeque[CircuitBreakerResult](windowSize + 1) private var slowCalls = 0 @@ -141,8 +145,8 @@ private[resilience] object CircuitBreakerResults: def calculateMetrics(lastAcquisitionResult: Option[AcquireResult], timestamp: Long): Metrics = val numOfOperations = results.length - val failuresRate = ((failedCalls / numOfOperations.toFloat) * 100).toInt - val slowRate = ((slowCalls / numOfOperations.toFloat) * 100).toInt + val failuresRate = Percentage.of(failedCalls, numOfOperations) + val slowRate = Percentage.of(slowCalls, numOfOperations) Metrics( failuresRate, slowRate, @@ -176,8 +180,8 @@ private[resilience] object CircuitBreakerResults: case CircuitBreakerResult.Slow => slowCalls -= 1 } val numOfOperations = results.length - val failuresRate = ((failedCalls / numOfOperations.toFloat) * 100).toInt - val slowRate = ((slowCalls / numOfOperations.toFloat) * 100).toInt + val failuresRate = Percentage.of(failedCalls, numOfOperations) + val slowRate = Percentage.of(slowCalls, numOfOperations) Metrics( failuresRate, slowRate, diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala index ef87d83e..ced934a4 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerStateMachineTest.scala @@ -15,33 +15,33 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: // given val config = defaultConfig val stateMachine = CircuitBreakerStateMachine(config) - val currentTimstamp = System.currentTimeMillis() + val currentTimestamp = System.currentTimeMillis() val lastResult: Option[AcquireResult] = None val metrics = - Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimstamp) + Metrics(hundredPercentSuccessRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimestamp) // when - val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimstamp), stateMachine.config) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimestamp), stateMachine.config) resultingState shouldBe a[CircuitBreakerState.Closed] } - it should "go to open after surpasing failure threshold" in supervised { + it should "go to open after surpassing failure threshold" in supervised { // given val config = defaultConfig val stateMachine = CircuitBreakerStateMachine(config) - val currentTimstamp = System.currentTimeMillis() + val currentTimestamp = System.currentTimeMillis() val lastResult: Option[AcquireResult] = None - val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimstamp) + val metrics = Metrics(badFailureRate, hundredPercentSuccessRate, config.minimumNumberOfCalls, lastResult, currentTimestamp) // when - val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimstamp), stateMachine.config) + val resultingState = CircuitBreakerStateMachine.nextState(metrics, CircuitBreakerState.Closed(currentTimestamp), stateMachine.config) // then resultingState shouldBe a[CircuitBreakerState.Open] } - it should "go straight to half open after surpasing failure threshold with defined waitDurationOpenState = 0" in supervised { + it should "go straight to half open after surpassing failure threshold with defined waitDurationOpenState = 0" in supervised { // given val config = defaultConfig.copy(waitDurationOpenState = FiniteDuration(0, TimeUnit.MILLISECONDS)) val stateMachine = CircuitBreakerStateMachine(config) @@ -78,7 +78,7 @@ class CircuitBreakerStateMachineTest extends AnyFlatSpec with Matchers: resultingState shouldBe a[CircuitBreakerState.Open] } - it should "update counter of completed operations in halfopen state" in supervised { + it should "update counter of completed operations in halfOpen state" in supervised { // given val config = defaultConfig val stateMachine = CircuitBreakerStateMachine(config) diff --git a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala index 92de4e2b..7ebc5c31 100644 --- a/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala +++ b/core/src/test/scala/ox/resilience/CircuitBreakerTest.scala @@ -40,7 +40,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit result2.value.value shouldBe "success" } - it should "drop operation after exceeding fauilure threshold" in supervised { + it should "drop operation after exceeding failure threshold" in supervised { // given val thresholdRate = PercentageThreshold(100) val numberOfOperations = 1 @@ -94,7 +94,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit behavior of "Circuit Breaker scheduled state changes" - it should "switch to halfopen after configured time" in supervised { + it should "switch to halfOpen after configured time" in supervised { // given val thresholdRate = PercentageThreshold(100) val numberOfOperations = 1 @@ -256,7 +256,7 @@ class CircuitBreakerTest extends AnyFlatSpec with Matchers with OptionValues wit sleep(500.millis) circuitBreaker.runOrDropEither(Right("c")).discard sleep(100.millis) // wait for state to register - // Should go back to closed, we have one succesful operation + // Should go back to closed, we have one successful operation circuitBreaker.stateMachine.state shouldBe a[CircuitBreakerState.Closed] } diff --git a/doc/utils/circuit-breaker.md b/doc/utils/circuit-breaker.md index 0a342c9b..c9356cd3 100644 --- a/doc/utils/circuit-breaker.md +++ b/doc/utils/circuit-breaker.md @@ -1,5 +1,9 @@ # Circuit Breaker +A circuit breaker is used to provide stability and prevent cascading failures in distributed systems. +These should be used with other mechanisms (such as timeouts or rate limiters) to prevent the failure of a single component from bringing down all components. +The Circuit Breaker can proactively identify unresponsive services and prevent repeated attempts. + The circuit breaker allows controlling execution of operations and stops if certain condition are met. CircuitBreaker is thread-safe and can be used in concurrent scenarios. ## API @@ -17,6 +21,12 @@ supervised: val operationResult: Option[T] = circuitBreaker.runOrDrop(operation) ``` +The CircuitBreaker is a finite state machine with three states: `Closed`, `Open` and `HalfOpen`. +- While in `Open` state - all calls are dropped. +- In `Closed` state - calls are accepted. +- In `HalfOpen` state - only configured number of call can be started and depending on their results state can go back to `Open` or `Closed`. See [conditions for state change](#conditions-for-state-change). + + ## Configuration Many config parameters relate to calculated metrics. Those metrics are percentage of calls that failed and percentage of calls that exceeded `slowCallDurationThreshold`. @@ -31,12 +41,12 @@ There are two ways that metrics are calculated. ### Failure rate and slow call rate thresholds -The state of the CircuitBreaker changes from `Closed` to `Open` when the `failureRate` is greater or equal to configurable threshold. For example when 80% of recorded call results failed. +The state of the CircuitBreaker changes from `Closed` to `Open` when the failure rate is greater or equal to configurable threshold. For example when 80% of recorded call results failed. Failures are counted based on provided `ErrorMode`. For example any exception that is thrown by the operation, when using the direct, "unwrapped" API or any `Left` variant when using `runOrDropEither`. The same state change also happen when percentage of slow calls (exceeding configurable `slowCallDurationThreshold`) is equal or greater than configured threshold. For example 80% of calls took longer then 10 seconds. -Those metrics are considered only when number of recorder calls is greater or equal to `minimumNumberOfCalls`, otherwise we don't change state even if `failureRate` is 100%. +Those metrics are considered only when number of recorder calls is greater or equal to `minimumNumberOfCalls`, otherwise we don't change state even if failure rate is 100%. ### Parameters @@ -52,18 +62,47 @@ Those metrics are considered only when number of recorder calls is greater or eq `SlidingWindow` variants: - `CountBased(windowSize: Int)` - This variant calculates metrics based on last n results of calls recorded. These statistics are cleared on every state change. -- `TimeBased(duration: FiniteDuration)` - This variant calculates metrics of operations in the lapse of `duraiton` before current time. These statistics are cleared on every state change. +- `TimeBased(duration: FiniteDuration)` - This variant calculates metrics of operations in the lapse of `duration` before current time. These statistics are cleared on every state change. + +### Providing configuration + +CircuitBreaker can be configured during instantiation by providing `CircuitBreakerConfig`. + +```scala mdoc:compile-only +import ox.supervised +import ox.resilience.* +import scala.concurrent.duration.* + +supervised: + // using default config + CircuitBreaker(CircuitBreakerConfig.default) + + // custom config + val config = CircuitBreakerConfig( + failureRateThreshold = PercentageThreshold(50), + slowCallThreshold = PercentageThreshold(50), + slowCallDurationThreshold = 10.seconds, + slidingWindow = SlidingWindow.CountBased(100), + minimumNumberOfCalls = 20, + waitDurationOpenState = 10.seconds, + halfOpenTimeoutDuration = 0.millis, + numberOfCallsInHalfOpenState = 10 + ) + + // providing config for CircuitBreaker instance + CircuitBreaker(config) +``` Values defined in `CircuitBreakerConfig.default`: ``` failureRateThreshold = PercentageThreshold(50) slowCallThreshold = PercentageThreshold(50) -slowCallDurationThreshold = 60.seconds +slowCallDurationThreshold = 10.seconds slidingWindow = SlidingWindow.CountBased(100) minimumNumberOfCalls = 20 -waitDurationOpenState = FiniteDuration(10, TimeUnit.SECONDS) -halfOpenTimeoutDuration = FiniteDuration(0, TimeUnit.MILLISECONDS) +waitDurationOpenState = 10.seconds, +halfOpenTimeoutDuration = 0.millis, numberOfCallsInHalfOpenState = 10 ``` @@ -79,7 +118,7 @@ numberOfCallsInHalfOpenState = 10 ```{note} -CircuitBreaker uses actor internally and since actor executes on one thread this may be bottleneck. That means that calculating state change can be deleyad and breaker can let few more operations to complete before openning. +CircuitBreaker uses actor internally and since actor executes on one thread this may be bottleneck. That means that calculating state change can be delayed and breaker can let few more operations to complete before opening. This can be the case with many very fast operations. ``` From b5f27fa1b173de4731f8947512e9668d1407d1d0 Mon Sep 17 00:00:00 2001 From: adamw <adam@warski.org> Date: Tue, 28 Jan 2025 11:46:36 +0100 Subject: [PATCH 26/26] Release 0.5.9 --- README.md | 4 +- .../out/_static/state-diagram-cb.svg | 1 + generated-doc/out/index.md | 4 +- generated-doc/out/info/dependency.md | 4 +- generated-doc/out/integrations/cron4s.md | 67 ++++++++ generated-doc/out/integrations/kafka.md | 2 +- generated-doc/out/integrations/mdc-logback.md | 2 +- generated-doc/out/streaming/flows.md | 2 +- generated-doc/out/utils/circuit-breaker.md | 153 ++++++++++++++++++ generated-doc/out/utils/retries.md | 2 +- 10 files changed, 232 insertions(+), 9 deletions(-) create mode 100644 generated-doc/out/_static/state-diagram-cb.svg create mode 100644 generated-doc/out/integrations/cron4s.md create mode 100644 generated-doc/out/utils/circuit-breaker.md diff --git a/README.md b/README.md index 0001b7d5..620f052d 100644 --- a/README.md +++ b/README.md @@ -23,13 +23,13 @@ the project! To test Ox, use the following dependency, using either [sbt](https://www.scala-sbt.org): ```scala -"com.softwaremill.ox" %% "core" % "0.5.8" +"com.softwaremill.ox" %% "core" % "0.5.9" ``` Or [scala-cli](https://scala-cli.virtuslab.org): ```scala -//> using dep "com.softwaremill.ox::core:0.5.8" +//> using dep "com.softwaremill.ox::core:0.5.9" ``` Documentation is available at [https://ox.softwaremill.com](https://ox.softwaremill.com), ScalaDocs can be browsed at [https://javadoc.io](https://www.javadoc.io/doc/com.softwaremill.ox). diff --git a/generated-doc/out/_static/state-diagram-cb.svg b/generated-doc/out/_static/state-diagram-cb.svg new file mode 100644 index 00000000..792b7545 --- /dev/null +++ b/generated-doc/out/_static/state-diagram-cb.svg @@ -0,0 +1 @@ +<svg aria-roledescription="stateDiagram" role="graphics-document document" viewBox="0 0 364.51666259765625 169" style="max-width: 364.51666259765625px;" class="statediagram" xmlns="http://www.w3.org/2000/svg" width="100%" id="export-svg"><style xmlns="http://www.w3.org/1999/xhtml">@import url("https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.2.0/css/all.min.css"); p {margin: 0;}</style><style>#export-svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:14px;fill:#333;}#export-svg .error-icon{fill:#ffffff;}#export-svg .error-text{fill:#000000;stroke:#000000;}#export-svg .edge-thickness-normal{stroke-width:1px;}#export-svg .edge-thickness-thick{stroke-width:3.5px;}#export-svg .edge-pattern-solid{stroke-dasharray:0;}#export-svg .edge-thickness-invisible{stroke-width:0;fill:none;}#export-svg .edge-pattern-dashed{stroke-dasharray:3;}#export-svg .edge-pattern-dotted{stroke-dasharray:2;}#export-svg .marker{fill:#000000;stroke:#000000;}#export-svg .marker.cross{stroke:#000000;}#export-svg svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:14px;}#export-svg p{margin:0;}#export-svg defs #statediagram-barbEnd{fill:#000000;stroke:#000000;}#export-svg g.stateGroup text{fill:#000000;stroke:none;font-size:10px;}#export-svg g.stateGroup text{fill:#333;stroke:none;font-size:10px;}#export-svg g.stateGroup .state-title{font-weight:bolder;fill:#333;}#export-svg g.stateGroup rect{fill:#ffffff;stroke:#000000;}#export-svg g.stateGroup line{stroke:#000000;stroke-width:1;}#export-svg .transition{stroke:#000000;stroke-width:1;fill:none;}#export-svg .stateGroup .composit{fill:#ffffff;border-bottom:1px;}#export-svg .stateGroup .alt-composit{fill:#e0e0e0;border-bottom:1px;}#export-svg .state-note{stroke:hsl(52.6829268293, 60%, 73.9215686275%);fill:#fff5ad;}#export-svg .state-note text{fill:#333;stroke:none;font-size:10px;}#export-svg .stateLabel .box{stroke:none;stroke-width:0;fill:#ffffff;opacity:0.5;}#export-svg .edgeLabel .label rect{fill:#ffffff;opacity:0.5;}#export-svg .edgeLabel{background-color:hsl(-120, 0%, 80%);text-align:center;}#export-svg .edgeLabel p{background-color:hsl(-120, 0%, 80%);}#export-svg .edgeLabel rect{opacity:0.5;background-color:hsl(-120, 0%, 80%);fill:hsl(-120, 0%, 80%);}#export-svg .edgeLabel .label text{fill:#333;}#export-svg .label div .edgeLabel{color:#333;}#export-svg .stateLabel text{fill:#333;font-size:10px;font-weight:bold;}#export-svg .node circle.state-start{fill:#000000;stroke:#000000;}#export-svg .node .fork-join{fill:#000000;stroke:#000000;}#export-svg .node circle.state-end{fill:#ffffff;stroke:#ffffff;stroke-width:1.5;}#export-svg [data-look="neo"].node circle.state-end{filter:none;stroke:#ffffff;fill:#000000;}#export-svg .end-state-inner{fill:#ffffff;stroke:#ffffff;stroke-width:1.5;}#export-svg .node rect{fill:#ffffff;stroke:#000000;stroke-width:1px;}#export-svg .node-rect-neo{fill:#ffffff;stroke:none;stroke-width:1px;}#export-svg .node polygon{fill:#ffffff;stroke:#000000;stroke-width:1px;}#export-svg #statediagram-barbEnd{fill:#000000;}#export-svg .statediagram-cluster rect{fill:#ffffff;stroke:#000000;stroke-width:1px;}#export-svg .cluster-label,#export-svg .nodeLabel{color:#333;line-height:1.0;}#export-svg .statediagram-cluster rect.outer{rx:3px;ry:3px;}#export-svg .statediagram-state .divider{stroke:#000000;}#export-svg .statediagram-state .title-state{rx:3px;ry:3px;}#export-svg .statediagram-cluster.statediagram-cluster .inner{fill:#ffffff;}#export-svg .statediagram-cluster.statediagram-cluster-alt .inner{fill:#f0f0f0;}#export-svg .statediagram-cluster .inner{rx:0;ry:0;}#export-svg .statediagram-state rect.basic{rx:3px;ry:3px;}#export-svg .state-shadow-neo{filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg .statediagram-state rect.divider{stroke-dasharray:10,10;fill:#f0f0f0;}#export-svg .note-edge{stroke-dasharray:5;}#export-svg .statediagram-note rect{fill:#fff5ad;stroke:hsl(52.6829268293, 60%, 73.9215686275%);stroke-width:1px;rx:0;ry:0;}#export-svg .statediagram-note rect{fill:#fff5ad;stroke:hsl(52.6829268293, 60%, 73.9215686275%);stroke-width:1px;rx:0;ry:0;}#export-svg .statediagram-note text{fill:#333;}#export-svg .statediagram-note .nodeLabel{color:#333;}#export-svg .node.statediagram-note rect{stroke:hsl(52.6829268293, 60%, 73.9215686275%)!important;}#export-svg .statediagram .edgeLabel{color:red;}#export-svg #dependencyStart,#export-svg #dependencyEnd{fill:#000000;stroke:#000000;stroke-width:1;}#export-svg .statediagramTitleText{text-anchor:middle;font-size:18px;fill:#333;}#export-svg .node .neo-node{stroke:#000000;}#export-svg [data-look="neo"].node rect,#export-svg [data-look="neo"].cluster rect,#export-svg [data-look="neo"].node polygon{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].node rect,#export-svg [data-look="neo"].node polygon,#export-svg [data-look="neo"].node path{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].node .neo-line path{stroke:hsl(0, 0%, 70%);filter:none;}#export-svg [data-look="neo"].node circle{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].node circle .state-start{fill:#000000;}#export-svg [data-look="neo"].statediagram-cluster rect{fill:#ffffff;stroke:url(#export-svg-gradient);stroke-width:1px;}#export-svg [data-look="neo"].icon-shape .icon{fill:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg [data-look="neo"].icon-shape .icon-neo path{stroke:url(#export-svg-gradient);filter:drop-shadow( 0px 1px 2px rgba(0, 0, 0, 0.25));}#export-svg :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;}#export-svg .red>*{fill:red!important;}#export-svg .red span{fill:red!important;}#export-svg .green>*{fill:green!important;}#export-svg .green span{fill:green!important;}#export-svg .yellow>*{fill:yellow!important;}#export-svg .yellow span{fill:yellow!important;}</style><g><defs><marker orient="auto" markerUnits="strokeWidth" markerHeight="14" markerWidth="20" refY="7" refX="19" id="export-svg_stateDiagram-barbEnd"><path d="M 19,7 L11,14 L13,7 L11,0 Z"/></marker></defs><g class="root"><g class="clusters"/><g class="edgePaths"><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6ODMuNTgzMzI4MjQ3MDcwMzEsInkiOjg0LjV9LHsieCI6MTEyLjQ3NDk5ODQ3NDEyMTEsInkiOjg0LjV9LHsieCI6MTQxLjM2NjY2ODcwMTE3MTg4LCJ5Ijo4NC41fV0=" data-id="edge0" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge0" d="M83.58332824707031,84.5L112.4749984741211,84.5L126.92083358764648,84.5L141.36666870117188,84.5"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6NjAuOTQ2OTY3Mzg1MDMxOTYsInkiOjY5LjV9LHsieCI6MTEyLjQ3NDk5ODQ3NDEyMTEsInkiOjE4LjV9LHsieCI6MTc0LjQ4MzMzNzQwMjM0Mzc1LCJ5IjoxOC41fSx7IngiOjIzNi40OTE2NzYzMzA1NjY0LCJ5IjoxOC41fSx7IngiOjI5NC4wMjc2NjMxNDQxOTgzNiwieSI6NjkuNX1d" data-id="edge1" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge1" d="M60.94696738503196,69.5L103.13062351459659,27.748618914078016Q112.4749984741211,18.5 125.62240488002602,18.5L174.48333740234375,18.5L222.39646232476852,18.5Q236.4916763305664,18.5 247.03958697655486,27.84968656552104L265.2596697373824,44L294.02766314419836,69.5"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6MjA3LjYwMDAwNjEwMzUxNTYyLCJ5Ijo3My41NTE2MDYyODIxODI5NX0seyJ4IjoyMzYuNDkxNjc2MzMwNTY2NCwieSI6NjR9LHsieCI6MjY1LjM4MzM0NjU1NzYxNzIsInkiOjcxLjk1NDUwNTQ2NTEzOTYyfV0=" data-id="edge2" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge2" d="M207.60000610351562,73.55160628218295L229.3786410670248,66.35157440794201Q236.4916763305664,64 243.7145938873291,65.9886263662849L250.9375114440918,67.97725273256981L265.3833465576172,71.95450546513962"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6MjY1LjM4MzM0NjU1NzYxNzIsInkiOjk3LjA0NTQ5NDUzNDg2MDM4fSx7IngiOjIzNi40OTE2NzYzMzA1NjY0LCJ5IjoxMDV9LHsieCI6MjA3LjYwMDAwNjEwMzUxNTYyLCJ5Ijo5NS40NDgzOTM3MTc4MTcwNX1d" data-id="edge3" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge3" d="M265.3833465576172,97.04549453486038L243.8261736441387,102.98065329881007Q236.4916763305664,105 229.2687587738037,102.61209842945426L222.04584121704102,100.22419685890853L207.60000610351562,95.44839371781705"/><path marker-end="url(#export-svg_stateDiagram-barbEnd)" data-points="W3sieCI6Mjk0LjAyNzY2MzE0NDE5ODM2LCJ5Ijo5OS41fSx7IngiOjIzNi40OTE2NzYzMzA1NjY0LCJ5IjoxNTAuNX0seyJ4IjoxNzQuNDgzMzM3NDAyMzQzNzUsInkiOjE1MC41fSx7IngiOjExMi40NzQ5OTg0NzQxMjExLCJ5IjoxNTAuNX0seyJ4Ijo2MC45NDY5NjczODUwMzE5NiwieSI6OTkuNX1d" data-id="edge4" data-et="edge" data-edge="true" style=";fill:none" class="edge-thickness-normal edge-pattern-solid transition" id="edge4" d="M294.02766314419836,99.5L247.03958697655483,141.15031343447896Q236.4916763305664,150.5 222.39646232476852,150.5L174.48333740234375,150.5L125.62240488002602,150.5Q112.4749984741211,150.5 103.13062351459659,141.251381085922L86.71098292957653,125L60.94696738503196,99.5"/></g><g class="edgeLabels"><g transform="translate(112.4749984741211, 84.5)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge0" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>1</p></span></div></foreignObject></g></g><g transform="translate(174.48333740234375, 18.5)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge1" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>2</p></span></div></foreignObject></g></g><g transform="translate(236.4916763305664, 64)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge2" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>3</p></span></div></foreignObject></g></g><g transform="translate(236.4916763305664, 105)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge3" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>4</p></span></div></foreignObject></g></g><g transform="translate(174.48333740234375, 150.5)" class="edgeLabel"><g transform="translate(-3.8916702270507812, -10.5)" data-id="edge4" class="label"><foreignObject height="21" width="7.7833404541015625"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"><p>5</p></span></div></foreignObject></g></g></g><g class="nodes"><g transform="translate(45.791664123535156, 84.5)" data-look="neo" data-et="node" data-node="true" data-id="Closed" id="state-Closed-4" class="node green statediagram-state"><rect stroke="url(#gradient)" height="30" width="75.58332824707031" y="-15" x="-37.791664123535156" ry="3" data-id="Closed" rx="3" style="fill:green !important" class="basic label-container"/><g transform="translate(-21.791664123535156, -7)" style="" class="label"><rect/><foreignObject height="14" width="43.58332824707031"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>Closed</p></span></div></foreignObject></g></g><g transform="translate(174.48333740234375, 84.5)" data-look="neo" data-et="node" data-node="true" data-id="Open" id="state-Open-3" class="node red statediagram-state"><rect stroke="url(#gradient)" height="30" width="66.23333740234375" y="-15" x="-33.116668701171875" ry="3" data-id="Open" rx="3" style="fill:red !important" class="basic label-container"/><g transform="translate(-17.116668701171875, -7)" style="" class="label"><rect/><foreignObject height="14" width="34.23333740234375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>Open</p></span></div></foreignObject></g></g><g transform="translate(310.95001220703125, 84.5)" data-look="neo" data-et="node" data-node="true" data-id="HalfOpen" id="state-HalfOpen-4" class="node yellow statediagram-state"><rect stroke="url(#gradient)" height="30" width="91.13333129882812" y="-15" x="-45.56666564941406" ry="3" data-id="HalfOpen" rx="3" style="fill:yellow !important" class="basic label-container"/><g transform="translate(-29.566665649414062, -7)" style="" class="label"><rect/><foreignObject height="14" width="59.133331298828125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>HalfOpen</p></span></div></foreignObject></g></g></g></g></g><linearGradient y2="0%" x2="100%" y1="0%" x1="0%" gradientUnits="objectBoundingBox" id="export-svg-gradient"><stop stop-opacity="1" stop-color="#0042eb" offset="0%"/><stop stop-opacity="1" stop-color="#eb0042" offset="100%"/></linearGradient></svg> \ No newline at end of file diff --git a/generated-doc/out/index.md b/generated-doc/out/index.md index 171010ad..815796e5 100644 --- a/generated-doc/out/index.md +++ b/generated-doc/out/index.md @@ -2,7 +2,7 @@ Safe direct-style concurrency and resiliency for Scala on the JVM. Requires JDK 21 & Scala 3. -To start using Ox, add the `com.softwaremill.ox::core:0.5.8` [dependency](info/dependency.md) to your project. +To start using Ox, add the `com.softwaremill.ox::core:0.5.9` [dependency](info/dependency.md) to your project. Then, take a look at the tour of Ox, or follow one of the topics listed in the menu to get to know Ox's API! In addition to this documentation, ScalaDocs can be browsed at [https://javadoc.io](https://www.javadoc.io/doc/com.softwaremill.ox). @@ -75,6 +75,7 @@ In addition to this documentation, ScalaDocs can be browsed at [https://javadoc. utils/resources utils/control-flow utils/actors + utils/circuit-breaker utils/utility .. toctree:: @@ -83,6 +84,7 @@ In addition to this documentation, ScalaDocs can be browsed at [https://javadoc. integrations/kafka integrations/mdc-logback + integrations/cron4s .. toctree:: :maxdepth: 2 diff --git a/generated-doc/out/info/dependency.md b/generated-doc/out/info/dependency.md index 941f290c..9cf717cb 100644 --- a/generated-doc/out/info/dependency.md +++ b/generated-doc/out/info/dependency.md @@ -4,10 +4,10 @@ To use ox core in your project, add: ```scala // sbt dependency -"com.softwaremill.ox" %% "core" % "0.5.8" +"com.softwaremill.ox" %% "core" % "0.5.9" // scala-cli dependency -//> using dep com.softwaremill.ox::core:0.5.8 +//> using dep com.softwaremill.ox::core:0.5.9 ``` Ox core depends only on the Java [jox](https://github.com/softwaremill/jox) project, where channels are implemented. There are no other direct or transitive dependencies. diff --git a/generated-doc/out/integrations/cron4s.md b/generated-doc/out/integrations/cron4s.md new file mode 100644 index 00000000..1e53f7ce --- /dev/null +++ b/generated-doc/out/integrations/cron4s.md @@ -0,0 +1,67 @@ +# Cron scheduler + +Dependency: + +```scala +"com.softwaremill.ox" %% "cron" % "0.5.9" +``` + +This module allows to run schedules based on cron expressions from [cron4s](https://github.com/alonsodomin/cron4s). + +`CronSchedule` can be used in all places that requires `Schedule` especially in repeat scenarios. + +For defining `CronExpr` see [cron4s documentation](https://www.alonsodomin.me/cron4s/userguide/index.html). + +## Api + +The cron module exposes methods for creating `Schedule` based on `CronExpr`. + +```scala +import ox.scheduling.cron.* +import cron4s.* + +repeat(RepeatConfig(CronSchedule.unsafeFromString("10-35 2,4,6 * ? * *")))(operation) +``` + +## Operation definition + +Methods from `ox.scheduling.cron.CronSchedule` define `Schedule`, so they can be plugged into `RepeatConfig` and used with `repeat` API. + +## Configuration + +All configuration beyond `CronExpr` is provided by the `repeat` API. If an error handling within the operation +is needed, you can use a `retry` inside it (see an example below) or use `scheduled` with `CronSchedule` instead of `repeat`, which allows +full customization. + +## Examples + +```scala +import ox.UnionMode +import ox.scheduling.cron.CronSchedule +import scala.concurrent.duration.* +import ox.resilience.{RetryConfig, retry} +import ox.scheduling.* +import cron4s.* + +def directOperation: Int = ??? +def eitherOperation: Either[String, Int] = ??? +def unionOperation: String | Int = ??? + +val cronExpr: CronExpr = Cron.unsafeParse("10-35 2,4,6 * ? * *") + +// various operation definitions - same syntax +repeat(RepeatConfig(CronSchedule.fromCronExpr(cronExpr)))(directOperation) +repeatEither(RepeatConfig(CronSchedule.fromCronExpr(cronExpr)))(eitherOperation) + +// infinite repeats with a custom strategy +def customStopStrategy: Int => Boolean = ??? +repeat(RepeatConfig(CronSchedule.fromCronExpr(cronExpr), customStopStrategy))(directOperation) + +// custom error mode +repeatWithErrorMode(UnionMode[String])(RepeatConfig(CronSchedule.fromCronExpr(cronExpr)))(unionOperation) + +// repeat with retry inside +repeat(RepeatConfig(CronSchedule.fromCronExpr(cronExpr))) { + retry(RetryConfig.backoff(3, 100.millis))(directOperation) +} +``` diff --git a/generated-doc/out/integrations/kafka.md b/generated-doc/out/integrations/kafka.md index 5b85b09a..330b2c5a 100644 --- a/generated-doc/out/integrations/kafka.md +++ b/generated-doc/out/integrations/kafka.md @@ -3,7 +3,7 @@ Dependency: ```scala -"com.softwaremill.ox" %% "kafka" % "0.5.8" +"com.softwaremill.ox" %% "kafka" % "0.5.9" ``` `Flow`s which read from a Kafka topic, mapping stages and drains which publish to Kafka topics are available through diff --git a/generated-doc/out/integrations/mdc-logback.md b/generated-doc/out/integrations/mdc-logback.md index 42d570b9..f62f7458 100644 --- a/generated-doc/out/integrations/mdc-logback.md +++ b/generated-doc/out/integrations/mdc-logback.md @@ -3,7 +3,7 @@ Dependency: ```scala -"com.softwaremill.ox" %% "mdc-logback" % "0.5.8" +"com.softwaremill.ox" %% "mdc-logback" % "0.5.9" ``` Ox provides support for setting inheritable MDC (mapped diagnostic context) values, when using the [Logback](https://logback.qos.ch) diff --git a/generated-doc/out/streaming/flows.md b/generated-doc/out/streaming/flows.md index 42dd6f33..f29b1703 100644 --- a/generated-doc/out/streaming/flows.md +++ b/generated-doc/out/streaming/flows.md @@ -171,7 +171,7 @@ To obtain a `org.reactivestreams.Publisher` instance, you'll need to add the fol bring the `toReactiveStreamsPublisher` method into scope: ```scala -// sbt dependency: "com.softwaremill.ox" %% "flow-reactive-streams" % "0.5.8" +// sbt dependency: "com.softwaremill.ox" %% "flow-reactive-streams" % "0.5.9" import ox.supervised import ox.flow.Flow diff --git a/generated-doc/out/utils/circuit-breaker.md b/generated-doc/out/utils/circuit-breaker.md new file mode 100644 index 00000000..110cced7 --- /dev/null +++ b/generated-doc/out/utils/circuit-breaker.md @@ -0,0 +1,153 @@ +# Circuit Breaker + +A circuit breaker is used to provide stability and prevent cascading failures in distributed systems. +These should be used with other mechanisms (such as timeouts or rate limiters) to prevent the failure of a single component from bringing down all components. +The Circuit Breaker can proactively identify unresponsive services and prevent repeated attempts. + +The circuit breaker allows controlling execution of operations and stops if certain condition are met. CircuitBreaker is thread-safe and can be used in concurrent scenarios. + +## API + +```scala +import ox.supervised +import ox.resilience.* + +supervised: + val circuitBreaker = CircuitBreaker(CircuitBreakerConfig.default) + + type T + def operation: T = ??? + + val operationResult: Option[T] = circuitBreaker.runOrDrop(operation) +``` + +The CircuitBreaker is a finite state machine with three states: `Closed`, `Open` and `HalfOpen`. +- While in `Open` state - all calls are dropped. +- In `Closed` state - calls are accepted. +- In `HalfOpen` state - only configured number of call can be started and depending on their results state can go back to `Open` or `Closed`. See [conditions for state change](#conditions-for-state-change). + + +## Configuration + +Many config parameters relate to calculated metrics. Those metrics are percentage of calls that failed and percentage of calls that exceeded `slowCallDurationThreshold`. +Which calls are included during calculation of these metrics are determined by `SlidingWindow` configuration. + +### Sliding window + +There are two ways that metrics are calculated. + +- Count based sliding window - `SlidingWindow.CountBased`, counts metrics based on last n call results. +- Time based sliding window - `SlidingWindow.TimeBased`, counts metrics based on call results recorded in the lapse of duration before current time. + +### Failure rate and slow call rate thresholds + +The state of the CircuitBreaker changes from `Closed` to `Open` when the failure rate is greater or equal to configurable threshold. For example when 80% of recorded call results failed. +Failures are counted based on provided `ErrorMode`. For example any exception that is thrown by the operation, when using the direct, "unwrapped" API or any `Left` variant when using `runOrDropEither`. + +The same state change also happen when percentage of slow calls (exceeding configurable `slowCallDurationThreshold`) is equal or greater than configured threshold. For example 80% of calls took longer then 10 seconds. + +Those metrics are considered only when number of recorder calls is greater or equal to `minimumNumberOfCalls`, otherwise we don't change state even if failure rate is 100%. + +### Parameters + +- `failureRateThreshold: PercentageThreshold` - percentage of recorded calls marked as failed required to switch to open state. +- `slowCallThreshold: PercentageThreshold` - percentage of recorder calls marked as slow required to switch to open state. +- `slowCallDurationThreshold: FiniteDuration` - duration that call has to exceed to be marked as slow. +- `slidingWindow: SlidingWindow` - mechanism to determine how calls are recorded. +- `minimumNumberOfCalls: Int` - minimum number of calls recorded needed for breaker to be able to switch to open state based on thresholds. +- `waitDurationOpenState: FiniteDuration` - duration that CircuitBreaker will wait before switching from `Open` state to `HalfOpen`. +- `halfOpenTimeoutDuration: FiniteDuration` - timeout for `HalfOpen` state after which, if not enough calls were recorder, breaker will go back to `Open` state. Zero means there is no timeout. +- `numberOfCallsInHalfOpenState: Int` - number of calls recorded in `HalfOpen` state needed to calculate metrics to decide if breaker should go back to `Open` state or `Closed`. It is also maximum number of operations that can be started in this state. + +`SlidingWindow` variants: + +- `CountBased(windowSize: Int)` - This variant calculates metrics based on last n results of calls recorded. These statistics are cleared on every state change. +- `TimeBased(duration: FiniteDuration)` - This variant calculates metrics of operations in the lapse of `duration` before current time. These statistics are cleared on every state change. + +### Providing configuration + +CircuitBreaker can be configured during instantiation by providing `CircuitBreakerConfig`. + +```scala +import ox.supervised +import ox.resilience.* +import scala.concurrent.duration.* + +supervised: + // using default config + CircuitBreaker(CircuitBreakerConfig.default) + + // custom config + val config = CircuitBreakerConfig( + failureRateThreshold = PercentageThreshold(50), + slowCallThreshold = PercentageThreshold(50), + slowCallDurationThreshold = 10.seconds, + slidingWindow = SlidingWindow.CountBased(100), + minimumNumberOfCalls = 20, + waitDurationOpenState = 10.seconds, + halfOpenTimeoutDuration = 0.millis, + numberOfCallsInHalfOpenState = 10 + ) + + // providing config for CircuitBreaker instance + CircuitBreaker(config) +``` + +Values defined in `CircuitBreakerConfig.default`: + +``` +failureRateThreshold = PercentageThreshold(50) +slowCallThreshold = PercentageThreshold(50) +slowCallDurationThreshold = 10.seconds +slidingWindow = SlidingWindow.CountBased(100) +minimumNumberOfCalls = 20 +waitDurationOpenState = 10.seconds, +halfOpenTimeoutDuration = 0.millis, +numberOfCallsInHalfOpenState = 10 +``` + +## Conditions for state change + + + +1. State changes from `Closed` to `Open` after any threshold was exceeded (`failureThreshold` or `slowThreshold`) and number of recorder calls is equal or greater than `minimumNumberOfCalls`. +2. State changes from `Closed` to `HalfOpen` if any threshold was exceeded, number of recorder calls is equal or greater than `minimumNumberOfCalls` and `waitDurationOpenState` is zero. +3. State changes from `Open` to `HalfOpen` when `waitDurationOpenState` passes. +4. State changes from `HalfOpen` to `Open` if `halfOpenTimeoutDuration` passes without enough calls recorded or number of recorder calls is equal to `numberOfCallsInHalfOpenState` and any threshold was exceeded. +5. State changes from `HalfOpen` to `Closed` if `numberOfCallsInHalfOpenState` where completed before timeout and there wasn't any threshold exceeded. + + +```{note} +CircuitBreaker uses actor internally and since actor executes on one thread this may be bottleneck. That means that calculating state change can be delayed and breaker can let few more operations to complete before opening. +This can be the case with many very fast operations. +``` + +## Examples + +```scala +import ox.UnionMode +import ox.supervised +import ox.resilience.* +import scala.concurrent.duration.* + +def directOperation: Int = ??? +def eitherOperation: Either[String, Int] = ??? +def unionOperation: String | Int = ??? + +supervised: + val circuitBreaker = CircuitBreaker(CircuitBreakerConfig.default) + + // various operation definitions + circuitBreaker.runOrDrop(directOperation) + circuitBreaker.runOrDropEither(eitherOperation) + + // custom error mode + circuitBreaker.runOrDropWithErrorMode(UnionMode[String])(unionOperation) + + // retry with circuit breaker inside + retryEither(RetryConfig.backoff(3, 100.millis)){ + circuitBreaker.runOrDrop(directOperation) match + case Some(value) => Right(value) + case None => Left("Operation dropped") + } +``` diff --git a/generated-doc/out/utils/retries.md b/generated-doc/out/utils/retries.md index 98b023ca..174283ee 100644 --- a/generated-doc/out/utils/retries.md +++ b/generated-doc/out/utils/retries.md @@ -160,7 +160,7 @@ Instance with default configuration can be obtained with `AdaptiveRetry.default` `retry` will attempt to retry an operation if it throws an exception; `retryEither` will additionally retry, if the result is a `Left`. Finally `retryWithErrorMode` is the most flexible, and allows retrying operations using custom failure modes (such as union types). -The methods have an additional parameter, `shouldPayPenaltyCost`, which determines if result `T` should be considered failure in terms of paying cost for retry. Penalty is paid only if it is decided to retry operation, the penalty will not be paid for successful operation. +The methods have an additional parameter, `shouldPayPenaltyCost`, which determines if result `Either[E, T]` should be considered as a failure in terms of paying cost for retry. Penalty is paid only if it is decided to retry operation, the penalty will not be paid for successful operation. ### Examples