client/defaultConfig.yaml

chaosMonkeys:
  # Kills Hazelcast members. As of Feb. 2023/v0.8.0 of Hazeltest, the member killer monkey
  # can only target members running in a Kubernetes cluster, but you can choose between an
  # in-cluster access mode and an out-of-cluster access mode, see below.
  memberKiller:
    # Enables or disables the member killer monkey.
    enabled: true
    # Configures the number of runs or iterations the monkey will perform. Note that
    # number of runs != number of kills in case the chaos probability is set to something
    # less than 1.0 (100%), but the number evaluated to decide whether the monkey should
    # terminate is indeed the number of runs, and not the number of kills.
    numRuns: 100
    # In each run, the monkey will strike with this probability. The provided number expresses
    # a percentage, i.e. 1.0 corresponds to 100%. For this value to be semantically correct,
    # therefore, it must be within the closed interval [0,1].
    chaosProbability: 0.5
    memberAccess:
      # Can take the value of one of its immediate object-type sub-keys, i.e. 'k8sOutOfCluster' or 'k8sInCluster'
      mode: k8sInCluster
      # Controls whether the member killer monkey should only consider active ("ready") members. If this is set to
      # true, the monkey will not kill Pods that haven't achieved readiness.
      targetOnlyActive: true
      # Mode for accessing Hazelcast members from outside the Kubernetes cluster. To connect to the Kubernetes cluster,
      # a kubeconfig file is used.
      # Handy when testing Hazeltest configurations locally prior to a deployment via Helm. However, since the absolute
      # path to the kubeconfig file can be specified by means of the corresponding property (see below), it might also
      # be used for in-cluster access by mounting the kubeconfig file into the Hazeltest Pod and specifying the path
      # accordingly.
      k8sOutOfCluster:
        # Specifies the absolute path to the kubeconfig file to be used to connect to the Kubernetes cluster
        # the target Hazelcast members run on.
        # If 'default' is given, Hazeltest will look for the Kubeconfig in $HOME/.kube/config.
        # In non-default cases, the absolute path of the file to be used can be specified.
        kubeconfig: default
        # The namespace in which to search for Hazelcast member Pods.
        namespace: hazelcastplatform
        # The label selector to use for identifying the Hazelcast member Pods.
        labelSelector: app.kubernetes.io/name=hazelcastimdg
      # Mode for accessing Hazelcast members from within the Kubernetes cluster. Useful when Hazeltest itself gets
      # deployed to Kubernetes because, while member access can be achieved with out-of-cluster mode in combination
      # with mounting the kubeconfig file and specifying its path accordingly, the in-cluster mode only relies on
      # native Kubernetes RBAC artifacts to delete Hazelcast member Pods.
      # (Note that for those artifacts to be rendered by the Helm chart, the 'features.useDeletePodsServiceAccount'
      # property in the chart's values.yaml file has to be set to 'true'.)
      # Hazeltest will attempt retrieve its current namespace by means of the POD_NAMESPACE environment variable and
      # the '/var/run/secrets/kubernetes.io/serviceaccount/namespace' file mounted into the Pod by Kubernetes. If the
      # namespace cannot be retrieved by these means, the monkey will report an error and terminate.
      # Please note that the implication of this is that, regardless of how the namespace is retrieved, the Hazeltest
      # Pod has to reside in the same namespace as the Hazelcast member Pods to be acted upon, i.e. cross-namespace
      # actions on Hazelcast member Pods are not supported.
      k8sInCluster:
        # Same as for out-of-cluster config.
        labelSelector: app.kubernetes.io/name=hazelcastimdg
    # Configures the monkey's sleep behavior for between runs.
    sleep:
      # Whether a sleep should be performed between runs.
      enabled: true
      # The duration, in seconds, to sleep. (Mind the difference between this sleep config's duration and the various
      # runner's sleeps, where the sleep is configured in milliseconds.)
      durationSeconds: 60
      # Enables or disables randomness for this sleep configuration. If enabled, the amount of seconds to sleep will be
      # determined randomly in the closed interval [0, <durationSeconds>]. If disabled, the sleep will be static, i.e.
      # the monkey will simply sleep for the given amount of seconds.
      enableRandomness: false
    # Configures deletion behavior. When enabled, the member killer monkey will kill the selected Hazelcast member
    # using a grace period as defined by 'durationSeconds' and 'enableRandomness'.
    memberGrace:
      # Whether members should be terminated gracefully at all.
      enabled: true
      # Number of seconds for the graceful shutdown, if member grace has been enabled. Randomness can be enabled for
      # this, however, see below.
      durationSeconds: 30
      # Activates or deactivates randomness for the 'durationSeconds' property. The rules are the same as for the
      # sleep configuration explained above.
      enableRandomness: true

# Caution: State cleaners will not modify data structures internal to Hazelcast itself. Such data structures
# start with a prefix of two underscores, and state cleaners will skip all such data structures even if
# two underscores have been explicitly provided using the prefix configuration below. It is generally advisable
# not to assign names starting with two underscores to regular payload data structures.
stateCleaner:
  maps:
    enabled: true
    prefix:
      enabled: true
      prefix: "ht_"
  queues:
    enabled: true
    prefix:
      enabled: true
      prefix: "ht_"


queueTests:
  # 'queueTests.tweets' configures the TweetRunner. The TweetRunner has access to a file containing 500 tweets on
  # Marvel's "Avengers: Endgame" movie. This file is a simplified and shortened version of the original tweet collection,
  # which you can find here: https://www.kaggle.com/datasets/kavita5/twitter-dataset-avengersendgame
  tweets:
    # The TweetRunner will not be run when this is set to 'false'.
    enabled: true
    # The number of goroutines the TweetRunner will spawn to work on queues. (Depending on the configuration of the queue
    # names using the 'append*' properties, this may or may not correspond to a higher number of queues the runner will work on.)
    numQueues: 1
    # If set to 'true', the TweetRunner will append the goroutine index of each of the <numQueues> queues to the latter's names,
    # and consequently each of those goroutines will work on its own queue in Hazelcast. Conversely, if this is set to
    # 'false', all queue goroutines in this particular Hazeltest instance will work on the same queue (which
    # might even be the same as the queue in use by other Hazeltest instances depending on the value for the
    # 'appendClientIdToQueueName' setting).
    # In contrast to map runners such as the PokedexRunner, the TweetRunner cannot distinguish the elements written to and
    # read from a queue by different goroutines due to the nature of a queue (there is no unique key for a value; instead,
    # values or just put and polled). Therefore, different queue goroutines act on each other's elements, and if queue
    # names are identical across Hazeltest instances due to how the two 'append*' properties have been set, the TweetRunners
    # across those instances will act on each other's elements.
    appendQueueIndexToQueueName: true
    # If set to 'true', the TweetRunner will append the unique client ID of this Hazeltest instance to the names of the
    # queue or queues it spawns. This will make sure TweetRunners across Hazeltest instances all work on distinct queues,
    # thus avoiding those runners acting on each other's elements.
    appendClientIdToQueueName: false
    queuePrefix:
      enabled: true
      # This prefix will be put in front of the queue name as it is without introducing any additional special characters.
      prefix: "ht_"
    # Configuration for the goroutine responsible for putting tweets into a Hazelcast queue. Each of the <numQueues>
    # goroutines will spawn one goroutine for performing put operations.
    putConfig:
      # Enable or disable put goroutine.
      enabled: true
      # The number of test loops the goroutine will perform for put operations. In each test loop, the goroutine will put
      # all tweets into a Hazelcast queue. Thus, specifying, for example, 10.000 runs means the goroutine will put
      # 10.000 * 500 tweets in total.
      # You may have noticed the semantics of specifying the number of test loops for this queue runner is different
      # compared to the map runners -- for the latter, one configures the number of test loops only once, and the test loop
      # operations (ingest, read, delete) are performed sequentially. For this queue runner, on the other hand, the queue
      # operations (put and poll) are decoupled in that they run in their own goroutines, so the ability to configure
      # the number of test loops individually for puts and polls directly translates to a higher degree of versatility
      # regarding the client behavior this queue runner can simulate.
      numRuns: 500
      batchSize: 50
      sleeps:
        # Makes a put goroutine sleep once before performing the first put operation.
        initialDelay:
          enabled: false
          durationMs: 2000
          # Enables or disables randomness for this sleep configuration. If enabled and assuming the sleep itself is
          # enabled, the amount of milliseconds to sleep will be determined randomly in the closed interval
          # [0, <durationMs>]. If disabled, on the other hand, the sleep will be static, i.e. the goroutine for this
          # runner will simply sleep for the given amount of milliseconds.
          # Hint: For queue runners, it makes sense to disable randomness for the initial sleep, as a reduced amount of
          # milliseconds slept due to randomness might invalidate the purpose of the initial sleep, which is to give the
          # operation in question, normally the put operation, enough time to put some elements into its queues before
          # the poll operations start.
          enableRandomness: false
        # Causes a put goroutine to sleep after performing one batch of put operations, where the batch size is
        # configured by means of the above '(...).putConfig.batchSize' property.
        betweenActionBatches:
          enabled: true
          durationMs: 1000
          enableRandomness: true
        # Makes a put goroutine pause execution before each run.
        betweenRuns:
          enabled: true
          durationMs: 2000
          enableRandomness: true
    # Configuration for the goroutine running poll operations for tweets against the Hazelcast queue. Just like with the
    # put operations, each of the <numQueues> goroutines will spawn a dedicated goroutine for polling operations, too.
    pollConfig:
      # Enable or disable polling goroutine.
      enabled: true
      numRuns: 500
      batchSize: 50
      # Same as for putConfig
      sleeps:
        initialDelay:
          enabled: true
          durationMs: 12500
          enableRandomness: false
        betweenActionBatches:
          enabled: true
          durationMs: 1000
          enableRandomness: true
        betweenRuns:
          enabled: true
          durationMs: 2000
          enableRandomness: true
  load:
    enabled: true
    numQueues: 5
    # Controls how many entries the queue load runner will use (this property is not available on the tweet runner because
    # the tweet runner works on a static data set, hence the number of elements as well as their size are given). The
    # queue load runner will create <numLoadEntries> string entries to iterate over them for each of the <numQueues> queues
    # in each of the <numRuns> runs of its put and poll configurations.
    numLoadEntries: 5000
    # Configures the size, in bytes, for the random string to be used as each load entry (to reduce Hazeltest's memory
    # footprint, only one random string of size <payloadSizeBytes> is created, and it will serve as the payload for each
    # of the <numLoadEntries> entries)
    payloadSizeBytes: 5000
    appendQueueIndexToQueueName: true
    appendClientIdToQueueName: false
    queuePrefix:
      enabled: true
      prefix: "ht_"
    putConfig:
      enabled: true
      numRuns: 500
      batchSize: 50
      sleeps:
        initialDelay:
          enabled: false
          durationMs: 2000
          enableRandomness: false
        betweenActionBatches:
          enabled: true
          durationMs: 200
          enableRandomness: true
        betweenRuns:
          enabled: true
          durationMs: 200
          enableRandomness: true
    pollConfig:
      enabled: true
      numRuns: 500
      batchSize: 50
      sleeps:
        initialDelay:
          enabled: true
          durationMs: 20000
          enableRandomness: false
        betweenActionBatches:
          enabled: true
          durationMs: 200
          enableRandomness: true
        betweenRuns:
          enabled: true
          durationMs: 200
          enableRandomness: true

mapTests:
  pokedex:
    # If set to 'false', the PokedexRunner will not be executed
    enabled: true
    # The runner will spawn one goroutine for each map
    numMaps: 10
    # If set to 'true', each of the <numMaps> goroutines will use its own map name, thus effectively accessing its own map
    # In other words, if this is set to 'true', this PokedexRunner will use <numMaps> distinct maps in Hazelcast; if it is set
    # to 'false', the <numMaps> goroutines will access the same map
    # For the PokedexRunner, this will result in a higher number of maps, each containing a smaller number of keys
    # Note: Both the client ID and the currently active goroutine/number are still part of the map keys, so no matter how
    # the following two properties are set, different clients and goroutines within clients will not act on each other's keys
    appendMapIndexToMapName: true
    # If so to 'true', the PokedexRunner will append the ID of its Hazeltest instance to the map name such that it gets its own map in Hazelcast; 
    # if set to 'false', then this PokedexRunner will share its maps with the maps created by PokedexRunners in other Hazeltest instances
    # Set this to 'false' if you would like to make all Hazeltest instances access the same map or maps
    appendClientIdToMapName: false
    # The number of test loops (e.g., ingest-read-delete) to execute in each map goroutine
    numRuns: 10000
    # Whether to evict all maps for this runner prior to the runner's test loop launching. For example, if 'numMaps' is
    # 10, this property will ensure the maps the runner's test loop will act upon are evicted of all entries before
    # the test loop starts. If the eviction invocation returns an error, a warning will be logged, but execution
    # will commence anyway.
    evictMapsPriorToRun: true
    mapPrefix:
      enabled: true
      # The prefix will be put in front of the map name as-is, so no additional underscores or other 
      # characters will be added
      prefix: "ht_"
    # Using sleeps, the pace with which this runner interacts with Hazelcast can be slowed down
    sleeps:
      # Can be enabled to make the test loop sleep for the given duration after each of the <numRuns>. One run consists of three actions (e.g. ingest-read-delete),
      # so use this setting to tell the test loop to sleep after having finished the last action of run <n> and before commencing with the first action of run <n+1>
      betweenRuns:
        enabled: true
        durationMs: 2000
        enableRandomness: true
    testLoop:
      # Can take the value of one of the sub-objects below, i.e. either 'batch' or 'boundary'
      type: boundary
      # The batch test loop offers comparatively simple logic to perform map actions in Hazelcast -- it inserts all
      # elements of the source data, then reads all items thus ingested, removes some of them, and then starts all over.
      # So, it works in batches of operations -- hence its name.
      # The batch test loop is sufficient to maximize both the CPU and memory load on a Hazelcast cluster -- specifically
      # in combination with the LoadRunner, not so much the PokedexRunner --, but it does not offer enough fine-tuning
      # options for the generated load to be very realistic (i.e. the load patterns generated on the target Hazelcast
      # cluster probably won't resemble load a typical client application might induce).
      batch:
        sleeps:
          # If this is enabled, the test loop will wait for <durationMs> (or a duration in the interval [1, <durationMs]
          # in case randomness has been enabled) after execution one action in a batch (e.g. one insert operation).
          afterBatchAction:
            enabled: true
            durationMs: 50
            enableRandomness: true
          # Can be enabled to make the batch test loop sleep for the given duration after one batch of actions (e.g.,
          # "ingest x elements") has finished, meaning the next action batch will start only after a sleep of <durationMs>
          betweenActionBatches:
            enabled: true
            durationMs: 5000
            enableRandomness: true
      # The boundary test loop was introduced to offer more sophisticated load generation logic aiming at inducing
      # load realistic enough to resemble the load patterns typical client applications might generate in
      # the day-to-day operation of a production environment. Thus, its goal is to offer the means for simulating
      # typical production traffic such that the Hazelcast cluster under test can be exposed to production-like
      # traffic in order to deliver proof that the cluster's configuration, bundled in whatever sort of release candidate
      # you use (e.g. a Helm chart), is actually fit for production, and to deliver this proof BEFORE the configuration
      # ever saw the light of a production environment.
      # The boundary test loop is so called because it revolves around "fill boundaries" as defined by the number
      # of items in the runner's data source in combination with an upper and a lower fill percentage (i.e., an
      # upper and a lower boundary the runner should respect).
      # For example, given the 151 source elements of the PokedexRunner and assuming an upper fill percentage
      # of 0.8 (80 %) as well as a lower fill percentage of 0.2 (20 %), the runner will execute insert and remove
      # operations so the number of elements present in the target map on the Hazelcast cluster under test will always
      # remain within the boundaries thus defined (here: 151*0.8=121 elements (rounded) as the upper boundary, and
      # 151*0.2=30 (rounded) as the lower). After each state-changing operation (insert or remove), the runner
      # will perform a read operation.
      # Each of the runner's goroutines will only consider its own key-value pairs, so even if the number of
      # maps (which internally translates to the number of goroutines) is set to something greater than 1 and the
      # runner is instructed to not append the map number to the map (such that all map goroutines write to the
      # same map), each of the runner's boundary test loop goroutines will still only work on
      # its own key-value pairs.
      # Iterating up and down between the map fill boundaries is carried out in scope of a so-called operation chain.
      # An operation chain represents the idea that the test loop should move over its source data multiple times in order
      # to have enough possibility to hit both the upper and the lower boundary a couple of times, and perform enough
      # operations of all kinds (insert, read, remove), in between for the load thus generated to be realistic.
      # Take, for example, the PokedexRunner's data source, which is the 151 Pokémon of the first-generation Pokédex.
      # With a chain length of 1510, the test loop will iterate between the upper and the lower boundary precisely ten times
      # assuming an upper boundary of 100 % and a lower boundary of 0 %, and the number of iterations increases when the
      # "boundary window" is configured to be more narrow. For this reason, the chain length should typically be
      # higher than the number of elements in the source data.
      # In order to introduce more randomness to the order of operations, the boundary test loop can be configured
      # with a probability to execute an operation towards the boundary (an "action towards boundary probability").
      # For example, if the test loop has determined it should fill the target map and the action towards boundary
      # probability is configured to be 0.7 (70 %), then it will perform an insert operation with a probability of
      # only 70 %. Hence, even in "fill mode", the action to be performed could be a remove (unless this would
      # cause the lower map fill boundary to be violated, in which case the runner will override the probability and
      # force an insert).
      boundary:
        sleeps:
          # Whether the runner's boundary test loop should sleep after having executed an operation chain.
          betweenOperationChains:
            enabled: true
            durationMs: 5000
            enableRandomness: true
          # Whether the runner's boundary test loop should sleep after having performed one operation within the
          # operation chain, e.g. an insert.
          afterChainAction:
            enabled: true
            durationMs: 500
            enableRandomness: true
          # Whether the runner's boundary test loop should sleep whenever it changed its map fill mode. For example, if
          # the previous map fill mode was 'fill', the upper fill boundary threshold was reached and the test loop
          # now switches to 'drain', this sleep will tell it to wait before moving on.
          # This sleeping behavior is useful for letting metrics collection systems (such as Prometheus or the
          # Hazelcast Management Center itself) catch up and read the current map metrics before the next actions
          # of the test loop begin altering them again. For example, by configuring a long-enough sleep, you can make
          # sure the map fill states at the point of mode change are correctly recorded by the metrics system. (For this
          # reason, randomness is disabled on this sleep by default, but you can enable it if the precision of metrics
          # collection is not important in your use case.)
          uponModeChange:
            enabled: true
            durationMs: 6000
            enableRandomness: false
        # The definition of the test loop's operation chain.
        operationChain:
          # Defines how often the test loop will "bounce" between the upper and the lower map fill boundary.
          # The higher this value, the more often both boundaries will be hit (though the "distance" between the
          # boundaries also influences how many boundary hits will occur in one chain).
          # This value should be greater than the number of elements in the runner's data source.
          length: 1_000
          # The test loop keeps track of the state of the key-value pairs in the target Hazelcast map as well
          # as of the actions it has performed (keeping track of this state is necessary to decide which map
          # action should be performed next, e.g. to avoid violating a map fill boundary). This property controls
          # whether both the local and the remote state should be cleared after having run through one operation chain.
          # (Here, clearing the remote state means the test loop will clear all values in the target map it is
          # responsible for, leaving the values of other test loops of the same runner untouched.)
          # If this is set to "false", then the transition from one operation chain to the next will be seamless,
          # i.e. from looking at the number of elements in the target Hazelcast map, you couldn't tell whether the
          # runner's test loop has begun a new operation chain, as it will neither reset its local nor the remote state
          # in the target Hazelcast map.
          resetAfterChain: true
          # The boundary definition this boundary test loop will use to determine the degree to which it should fill
          # the target Hazelcast map.
          # (You can configure all map goroutines (and hence each test loop) to work on the same Hazelcast map by
          # setting the "appendMapIndexToMapName" property to "false", but since each test loop will only consider
          # its own values, it will still respect those boundaries in relation to the number of items under its
          # responsibility.)
          boundaryDefinition:
            # The upper map fill boundary.
            # Example: Setting this to 0.8 (80 %) on the PokedexRunner's 151 data source elements will result in this
            # test loop not ingesting more than 121 elements.
            upper:
              # The map fill percentage that defines the upper map boundary in relation to the number
              # of elements in the runner's data source.
              mapFillPercentage: 0.8
              # Whether the map fill percentage should be determined randomly before the start of each new
              # operation chain. If set to "true", the random value will be taken from the half-open interval
              # (<upper.mapFillPercentage>, 1.0].
              enableRandomness: true
            lower:
              # The map fill percentage that defines the lower map boundary in relation to the number
              # of elements in the runner's data source.
              mapFillPercentage: 0.2
              # If set to "true", the random value will be taken from the half-open interval
              # [0.0, <lower.mapFillPercentage>).
              enableRandomness: true
            # The probability with which to execute an action towards the boundary.
            # If the test loop has determined it should currently fill the map, then this setting controls
            # the probability for an insert operation to be performed (here, the insert would be the action
            # towards the boundary because in "fill mode" the test loop aims for the upper boundary). The exception
            # to the rule is when the test loop determines it has reached a boundary, in which case it will enforce
            # an action to move away from it (e.g. a remove operation in case the upper map fill boundary has been
            # reached to not violate this boundary), thus overriding the probability setting.
            actionTowardsBoundaryProbability: 1
  load:
    enabled: true
    numMaps: 10
    # In contrast to the PokedexRunner, whose data set is limited by the number of Pokémon contained in the first-generation Pokédex (151), the LoadRunner can create
    # arbitrarily many entries, and the 'numEntriesPerMap' controls how many entries it will create
    numEntriesPerMap: 500
    # The payload for each of the <numEntriesPerMap> entries the LoadRunner creates is a random string, whose size in bytes is controlled by the 'payloadSizeBytes' property
    # (that is to say only one random string is created and will be used as the value in each of the <numEntriesPerMap> key-value pairs in order to
    # reduce the application's memory footprint)
    payloadSizeBytes: 10000000
    appendMapIndexToMapName: true
    appendClientIdToMapName: false
    numRuns: 10000
    evictMapsPriorToRun: true
    mapPrefix:
      enabled: true
      prefix: "ht_"
    sleeps:
      betweenRuns:
        enabled: true
        durationMs: 2000
        enableRandomness: true
    testLoop:
      type: boundary
      batch:
        sleeps:
          afterBatchAction:
            enabled: true
            durationMs: 10
            enableRandomness: true
          betweenActionBatches:
            enabled: true
            durationMs: 2000
            enableRandomness: true
      boundary:
        sleeps:
          betweenOperationChains:
            enabled: true
            durationMs: 1000
            enableRandomness: true
          afterChainAction:
            enabled: true
            durationMs: 50
            enableRandomness: false
          uponModeChange:
            enabled: true
            durationMs: 15000
            enableRandomness: false
        operationChain:
          length: 2000
          resetAfterChain: true
          boundaryDefinition:
            upper:
              mapFillPercentage: 0.9
              enableRandomness: true
            lower:
              mapFillPercentage: 0.2
              enableRandomness: true
            actionTowardsBoundaryProbability: 0.9