# Various stress tests to see if instance and services response adequately

## Inputs and Configuration

In [3]:
# NBVAL_IGNORE_OUTPUT

import os
import random
import requests
import time
from inspect import cleandoc
from dataclasses import dataclass

PAVICS_HOST = os.getenv("PAVICS_HOST", "pavics.ouranos.ca").rstrip("/")
if not PAVICS_HOST:
    raise ValueError("Cannot run test without a PAVICS_HOST value.")

PAVICS_URL = f"https://{PAVICS_HOST}"
VERIFY_SSL = True if "DISABLE_VERIFY_SSL" not in os.environ else False
MAGPIE_URL = PAVICS_URL + "/magpie"
TWITCHER_PROXY = "/twitcher/ows/proxy"
TWITCHER_URL = os.getenv("TWITCHER_URL", PAVICS_URL + TWITCHER_PROXY)

# test config
TEST_BIRDS = str(os.getenv("TEST_BIRDS", "finch,flyingpigeon,raven"))
TEST_BIRDS = [bird.strip() for bird in TEST_BIRDS.split(",")]
if not len(TEST_BIRDS):
    raise ValueError("Cannot run test without at least one service in TEST_BIRDS.")
TEST_RUNS = int(os.getenv("TEST_RUNS", 100))  # number of requests per tested bird
TEST_MAX_AVG_TIME = int(os.getenv("TEST_MAX_AVG_TIME", 1))     # maximum allowed request seconds on average for success
TEST_MAX_ERR_CODE = int(os.getenv("TEST_MAX_ERR_CODE", 0))     # maximum allowed amount of incorrect request status code
TEST_TIMEOUT_ABORT = int(os.getenv("TEST_TIMEOUT_ABORT", 5))   # maximum timeout duration to wait before abort request
TEST_TIMEOUT_RETRY = int(os.getenv("TEST_ABORT_THRESHOLD", 3)) # maximum request timeout retries before bird is aborted

print(f"PAVICS_HOST:  [{PAVICS_HOST}]")
print(f"TWITCHER_URL: [{TWITCHER_URL}]")
print(f"TEST_BIRDS:   {TEST_BIRDS}")

PAVICS_HOST:  [localhost]
TWITCHER_URL: [http://localhost:8001/ows/proxy]
TEST_BIRDS:   ['malleefowl']


## Utilities

In [4]:
@dataclass
class StressTestResult:
    code: int = 200
    runs: int = 0
    max_avg_time: float = 0
    max_err_code: int = 0
    timeout_abort: int = 0
    timeout_retry: int = 0
    timeout_count: int = 0
    method: str = "GET"
    url: str = None
    request_args: dict = None
    status: int = 0  # see description of stress-test
    codes = []
    delta = []
    times = []

    @property
    def avg_time(self):
        return sum(self.times) / self.runs

    @property
    def min_time(self):
        return min(self.times)

    @property
    def max_time(self):
        return max(self.times)

    @property
    def sum_err_code(self):
        return sum([code != self.code for code in self.codes])

    def __str__(self):
        columns = ["Run", "Codes", "Delta", "Times"]
        idx = len(str(self.runs))
        r = max(len(columns[0]), idx)
        w = 10
        header = "".join(f"{c:>{w if i else r}}" for i, c in enumerate(columns))
        offset = 16
        data = [f"{i+1:>{r+(offset if i else 0)}}"
                f"{('(!) ' if c != self.code else '(x) ' if self.code == 408 else '') + str(c):>{w}}"
                f"{d:>{w-1}.3f}s"
                f"{t:>{w-1}.3f}s"
                for i, (c, d, t)
                in enumerate(zip(self.codes, self.delta, self.times))]
        lines = "\n".join(data)
        summary = "Undefined failure result status condition encountered."
        if results.status == 0:
            summary = [
                "All passing conditions have been achieved.",
            ]
        elif results.status == -1:
            summary = [
                f"Detected {self.sum_err_code} erroneous HTTP codes not equal to expected {self.code}."
            ]
        elif results.status == -2:
            summary = [
                f"Detected regression with long request time.",
                f"Expected max-avg-time: ({self.max_avg_time:.3f}s <= {self.max_time:.3f}s)."
            ]
        elif results.status == -3:
            summary = [

            ]
        summary.append(f"Test {'succeeded' if results.status == 0 else 'failed'}.")
        summary = "\n".join([f"{s:>{r+(offset if i else 0)}}" for i, s in enumerate(summary)])
        return cleandoc(f"""
        Stress Test:
            Test:
                code: {self.code}
                runs: {self.runs}
                max-avg-time:  {self.max_avg_time}s
                max-err-code:  {self.max_err_code}
                sum-err-code:  {self.sum_err_code}
                timeout-abort: {self.timeout_abort}s
                timeout-retry: {self.timeout_retry}
                timeout-count: {self.timeout_count}
            Request:
                method: {self.method}
                url:    {self.url}
                args:   {self.request_args}
            Times:
                min: {self.min_time:.3f}s
                avg: {self.avg_time:.3f}s
                max: {self.max_time:.3f}s
            Results:
                {header}
                {lines}
            Summary:
                {summary}
        """)


def stress_test_requests(url: str, method="GET", runs=100, code=200, delays=True,
                         max_err_code=0, max_avg_time=None,
                         abort_timeout=5, abort_retries=3, **req_kwargs) -> StressTestResult:
    """
    Executes the request for the number of demanded runs and validates the expected status is always returned.

    Outputs the results of each request and a summary of their execution time.
    If requested, also validates that all responses were returned on average faster than the maximum allowed time.

    :param url: endpoint to stress test
    :param method: HTTP method for request
    :param runs: number of stress test request
    :param code: expected HTTP code from requests
    :param delays: whether to apply small random delays between requests
       Otherwise, sequential requests are executed as quickly as possible, when the previous response is obtained.
    :param max_err_code: maximum amount of erroneous HTTP status code allowed to consider the test successful.
    :param max_avg_time: maximum average time of requests permitted to consider the test successful.
    :param abort_timeout: duration to wait until a request is aborted, sets 408 (Read Timeout) as HTTP status code.
    :param abort_retries: number of failed timeout requests allowed before abort of whole stress test for this endpoint.
    :returns:
        StressTestResult with individual request results and one of below status:
        -  0 (success) for no error and all conditions achieved
        - -1 (failure) for maximum amount of HTT error code reached
        - -2 (failure) for maximum request time on average reached
        - -3 (failure) for aborted test due to too many timeout
    """
    print(f"\nStress Test with [{runs}] calls to [{url}]")
    req_kwargs.pop("timeout", None)
    result = StressTestResult()
    result.runs = runs
    result.url = url
    result.method = method
    result.request_args = req_kwargs
    result.max_err_code = max_err_code
    result.max_avg_time = max_avg_time
    result.abort_timeout = abort_timeout
    result.abort_retries = abort_retries
    result.codes = []
    result.times = []
    result.delta = [0.] + [float((random.randint(1, 100) / 1000) if delays else 0) for _ in range(1, runs)]

    char = len(str(runs))
    for i in range(runs):
        if not i % 10:
            print(f"Progress: {i:>{char}}/{runs}")
        start = time.perf_counter()
        try:
            resp = requests.request(method, url, timeout=abort_timeout, **req_kwargs)
        except requests.exceptions.Timeout:
            result.times.append(abort_timeout)
            result.codes.append(408)  # read timeout
            result.timeout_count += 1
        else:
            result.times.append(time.perf_counter() - start)
            result.codes.append(resp.status_code)
        if result.timeout_count > abort_timeout:
            result.status = -3
            print(f"Aborted: Too Many Timeout ({result.timeout_count})")
            return result
        if i == runs:
            break
        if result.delta[i]:
            time.sleep(result.delta[i])
    print(f"Progress: {runs:>{char}}/{runs}")
    if max_avg_time and result.avg_time > max_avg_time:
        result.status = -2
    elif len([c for c in result.codes if c == code]) >= (runs - max_err_code):
        result.status = 0
    else:
        result.status = -1
    return result


## Tests

In [None]:
# NBVAL_IGNORE_OUTPUT


test_statuses = []
for bird in TEST_BIRDS:
    bird_url = f"{TWITCHER_URL}/{bird}/wps?service=wps&request=getcapabilities"
    expect_status_code = 200
    results = stress_test_requests(bird_url, runs=TEST_RUNS, code=expect_status_code,
                                   max_err_code=TEST_MAX_ERR_CODE, max_avg_time=TEST_MAX_AVG_TIME,
                                   abort_retries=TEST_TIMEOUT_RETRY, abort_timeout=TEST_TIMEOUT_ABORT)
    test_statuses.append(results.status)
    print(results)
failed_tests = sum(test_statuses) 
assert not failed_tests, f"Failed {failed_tests} tests."
print("All tests passed!")