# Various stress tests to see if instance and services response adequately

## Inputs and Configuration

In [1]:
# NBVAL_IGNORE_OUTPUT

import os
import random
import requests
import time
from datetime import datetime 
from inspect import cleandoc
from dataclasses import dataclass
import threading
from threading import Thread


# PAVICS_HOST = os.getenv("PAVICS_HOST", "pavics.ouranos.ca").rstrip("/")
PAVICS_HOST = os.getenv("PAVICS_HOST", "hirondelle.crim.ca").rstrip("/")
if not PAVICS_HOST:
    raise ValueError("Cannot run test without a PAVICS_HOST value.")

PAVICS_URL = f"https://{PAVICS_HOST}"
VERIFY_SSL = True if "DISABLE_VERIFY_SSL" not in os.environ else False
MAGPIE_URL = PAVICS_URL + "/magpie"
TWITCHER_PROXY = "/twitcher/ows/proxy"
TWITCHER_URL = PAVICS_URL + TWITCHER_PROXY
TWITCHER_URL = os.getenv("TWITCHER_URL") or TWITCHER_URL

# test config
TEST_WPS_BIRDS = str(os.getenv("TEST_WPS_BIRDS", "finch,flyingpigeon,raven,hummingbird"))
TEST_WPS_BIRDS = [bird.strip() for bird in TEST_WPS_BIRDS.split(",")]
if not len(TEST_WPS_BIRDS):
    raise ValueError("Cannot run test without at least one service in TEST_WPS_BIRDS.")
TEST_RUNS = int(os.getenv("TEST_RUNS", 100))  # number of requests per tested bird
TEST_MAX_AVG_TIME = int(os.getenv("TEST_MAX_AVG_TIME", 1))     # maximum allowed request seconds on average for success
TEST_MAX_ERR_CODE = int(os.getenv("TEST_MAX_ERR_CODE", 0))     # maximum allowed amount of incorrect request status code
TEST_TIMEOUT_ABORT = int(os.getenv("TEST_TIMEOUT_ABORT", 5))   # maximum timeout duration to wait before abort request
TEST_TIMEOUT_RETRY = int(os.getenv("TEST_ABORT_THRESHOLD", 3)) # maximum request timeout retries before bird is aborted

print(f"PAVICS_HOST:    [{PAVICS_HOST}]")
print(f"TWITCHER_URL:   [{TWITCHER_URL}]")
print(f"TEST_WPS_BIRDS: {TEST_WPS_BIRDS}")

PAVICS_HOST:    [hirondelle.crim.ca]
TWITCHER_URL:   [https://hirondelle.crim.ca/twitcher/ows/proxy]
TEST_WPS_BIRDS: ['finch', 'flyingpigeon', 'raven', 'hummingbird']


## Utilities

In [2]:
@dataclass
class StressTestResult:
    code: int = 200
    runs: int = 0
    max_avg_time: float = 0
    max_err_code: int = 0
    timeout_abort: int = 0
    timeout_retry: int = 0
    timeout_count: int = 0
    method: str = "GET"
    url: str = None
    request_args: dict = None
    status: int = 0  # see description of stress-test
    codes = []
    delta = []
    times = []
    
    
    @property
    def avg_time(self):
        return sum(self.times) / self.runs

    @property
    def min_time(self):
        return min(self.times)

    @property
    def max_time(self):
        return max(self.times)

    @property
    def sum_err_code(self):
        return sum([code != self.code for code in self.codes])

    def __str__(self):
        columns = ["Run", "Codes", "Delta", "Times", "Timestamps"]
        idx = len(str(self.runs))
        r = max(len(columns[0]), idx)
        w = 10
        header = "".join(f"{c:>{w if i else r}}" for i, c in enumerate(columns))
        offset = 16  # spaces offset of result lines, relative to this file
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        data = [f"{i+1:>{r+(offset if i else 0)}}"
                f"{('(!) ' if c != self.code else '(x) ' if self.code == 408 else '') + str(c):>{w}}"
                f"{d:>{w-1}.3f}s"
                f"{t:>{w-1}.3f}s"
                f"{timestamp if c != self.code else '':>{w}}"
                for i, (c, d, t)
                in enumerate(zip(self.codes, self.delta, self.times))]
        lines = "\n".join(data)
        summary = "Undefined failure result status condition encountered."
        if self.status == 0:
            summary = [
                "All passing conditions have been achieved.",
            ]
        elif self.status == -1:
            summary = [
                f"Detected {self.sum_err_code} erroneous HTTP codes not equal to expected {self.code}."
            ]
        elif self.status == -2:
            summary = [
                f"Detected regression with long request time.",
                f"Expected max-avg-time: ({self.max_avg_time:.3f}s <= {self.max_time:.3f}s)."
            ]
        elif self.status == -3:
            summary = [
                f"Maximum number of timeout ({self.timeout_abort}s) requests exceeded ({self.timeout_count}).",
                "Test was aborted to avoid further delays."
            ]
        summary.append(f"Test {'succeeded' if self.status == 0 else 'failed'} (status={self.status}).")
        summary = ("\n" + offset * " ").join(summary)
        return cleandoc(f"""
        Stress Test:
            Test:
                code: {self.code}
                runs: {self.runs}
                max-avg-time:  {self.max_avg_time}s
                max-err-code:  {self.max_err_code}
                sum-err-code:  {self.sum_err_code}
                timeout-abort: {self.timeout_abort}s
                timeout-retry: {self.timeout_retry}
                timeout-count: {self.timeout_count}
            Request:
                method: {self.method}
                url:    {self.url}
                args:   {self.request_args}
            Times:
                min: {self.min_time:.3f}s
                avg: {self.avg_time:.3f}s
                max: {self.max_time:.3f}s
            Results:
                {header}
                {lines}
            Summary:
                {summary}
        """)


def stress_test_requests(progression_dict:dict, url: str, method="GET", runs=100, code=200, delays=True,
                         max_err_code=0, max_avg_time=None,
                         abort_timeout=5, abort_retries=3, **req_kwargs) -> StressTestResult:
    """
    Executes the request for the number of demanded runs and validates the expected status is always returned.

    Outputs the results of each request and a summary of their execution time.
    If requested, also validates that all responses were returned on average faster than the maximum allowed time.

    :param url: endpoint to stress test
    :param method: HTTP method for request
    :param runs: number of stress test request
    :param code: expected HTTP code from requests
    :param delays: whether to apply small random delays between requests
       Otherwise, sequential requests are executed as quickly as possible, when the previous response is obtained.
    :param max_err_code: maximum amount of erroneous HTTP status code allowed to consider the test successful.
    :param max_avg_time: maximum average time of requests permitted to consider the test successful.
    :param abort_timeout: duration to wait until a request is aborted, sets 408 (Read Timeout) as HTTP status code.
    :param abort_retries: number of failed timeout requests allowed before abort of whole stress test for this endpoint.
    :returns:
        StressTestResult with individual request results and one of below status:
        -  0 (success) for no error and all conditions achieved
        - -1 (failure) for maximum amount of HTT error code reached
        - -2 (failure) for maximum request time on average reached
        - -3 (failure) for aborted test due to too many timeout
    """
    thread_name = threading.current_thread().name
    print(f"\n Thread({thread_name}) \n Stress Test with [{runs}] calls to [{url}]")
    req_kwargs.pop("timeout", None)
    result = StressTestResult()
    result.runs = runs
    result.url = url
    result.method = method
    result.request_args = req_kwargs
    result.max_err_code = max_err_code
    result.max_avg_time = max_avg_time
    result.abort_timeout = abort_timeout
    result.abort_retries = abort_retries
    result.codes = []
    result.times = []
    result.datetimes = []
    result.delta = [0.] + [float((random.randint(1, 100) / 1000) if delays else 0) for _ in range(1, runs)]

    char = len(str(runs))
    for i in range(runs):
        progress_update = 50
        if not i % progress_update:
            increase_progress(progression_dict, count=progress_update)
        start = time.perf_counter()
        try:
            resp = requests.request(method, url, timeout=abort_timeout, **req_kwargs)
        except requests.exceptions.Timeout:
            result.times.append(abort_timeout)
            result.codes.append(408)  # read timeout
            result.timeout_count += 1
        else:
            result.times.append(time.perf_counter() - start)
            result.codes.append(resp.status_code)
        if resp.status_code == 500:
            print(resp.text)
        if result.timeout_count > abort_timeout:
            result.status = -3
            print(f"Aborted: Too Many Timeout ({result.timeout_count})")
            return result
        if i == runs:
            break
        if result.delta[i]:
            time.sleep(result.delta[i])
#     print(f"Progress: {runs:>{char}}/{runs}")
    if max_avg_time and result.avg_time > max_avg_time:
        result.status = -2
    elif len([c for c in result.codes if c == code]) >= (runs - max_err_code):
        result.status = 0
    else:
        result.status = -1
    return result


In [4]:
def new_progression_dict(n_threads:int, call_per_threads: int):
    return {"count" : 0, "total" : call_per_threads * n_threads}

def increase_progress(progression_dict, count):
    thread_name = threading.current_thread().name
    progression_dict['count'] = progression_dict['count'] + count
    if not progression_dict['count']%50:
        print(f'Progress : {progression_dict["count"]}/{progression_dict["total"]}     Thread_id : {thread_name}')

## Tests

In [12]:
# NBVAL_IGNORE_OUTPUT

def test_wps_eachBird(progression_dict):
    print()
    failed_count = 0
    failed_results = ""
    for bird in TEST_WPS_BIRDS:
        bird_url = f"{TWITCHER_URL}/{bird}/wps?service=wps&request=getcapabilities"
        expect_status_code = 200
        results = stress_test_requests(progression_dict,bird_url, runs=TEST_RUNS, code=expect_status_code,
                                       max_err_code=TEST_MAX_ERR_CODE, max_avg_time=TEST_MAX_AVG_TIME,
                                       abort_retries=TEST_TIMEOUT_RETRY, abort_timeout=TEST_TIMEOUT_ABORT)
        print(results)
        if results.status != 0:
            failed_count += 1
            failed_results = f"{failed_results}\n{results}"
            
    thread_name = threading.current_thread().name
    assert failed_count == 0, f"Failed {failed_count} tests.  Failed results: {failed_results}"
    print(f"\nThread :{thread_name}, All tests passed!")
    
    
def test_tredds(progression_dict):
    print()
    failed_count = 0
    failed_results = ""
    bird_url = f"{TWITCHER_URL}/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc"
    expect_status_code = 200
    results = stress_test_requests(progression_dict,bird_url, runs=TEST_RUNS, code=expect_status_code,
                                   max_err_code=TEST_MAX_ERR_CODE, max_avg_time=TEST_MAX_AVG_TIME,
                                   abort_retries=TEST_TIMEOUT_RETRY, abort_timeout=TEST_TIMEOUT_ABORT)
    
    print(results)
    thread_name = threading.current_thread().name
    if results.status != 0:
        failed_count += 1
        failed_results = f"{failed_results}\n{results}"
    assert failed_count == 0, f"Failed {failed_count} tests.  Failed results: {failed_results}"
    print(f"\nThread :{thread_name},  All tests passed!")
    

In [6]:
def run_test(target_fonction, n_threads:int, call_per_threads:int):
    TEST_PROGRESS = new_progression_dict(n_threads, call_per_threads)
    threads_list = []
    for i in range(n_threads):
        threads_list.append(Thread(target = target_fonction, args=(TEST_PROGRESS, ), name=f"T{i}"))
    
    for t in threads_list:
        t.start()

    for t in threads_list:
        t.join()

In [7]:
# Running test on wps on each bird
run_test(test_wps_eachBird, n_threads = 3, call_per_threads = 100*len(TEST_WPS_BIRDS))



 Thread(T0) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/finch/wps?service=wps&request=getcapabilities]
Progress : 50/1200     Thread_id : T0


 Thread(T1) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/finch/wps?service=wps&request=getcapabilities]
Progress : 100/1200     Thread_id : T1


 Thread(T2) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/finch/wps?service=wps&request=getcapabilities]
Progress : 150/1200     Thread_id : T2
Progress : 200/1200     Thread_id : T0
Progress : 250/1200     Thread_id : T2
Progress : 300/1200     Thread_id : T1

 Thread(T0) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/flyingpigeon/wps?service=wps&request=getcapabilities]
Progress : 350/1200     Thread_id : T0

 Thread(T2) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/flyingpigeon/wps?service=wps&request=getcapabilities]
Progre

In [8]:
# Running test on thredds
for i in range(2):
    run_test(test_tredds, n_threads = 5, call_per_threads = 100)
    time.sleep(1)



 Thread(T0) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc]
Progress : 50/500     Thread_id : T0


 Thread(T1) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc]
Progress : 100/500     Thread_id : T1

 Thread(T2) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc]
Progress : 150/500     Thread_id : T2


 Thread(T3) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101

Exception in thread T0:
Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-5-61a7846a36ad>", line 38, in test_tredds
AssertionError: Failed 1 tests.  Failed results: 
Stress Test:
    Test:
        code: 200
        runs: 100
        max-avg-time:  1s
        max-err-code:  0
        sum-err-code:  1
        timeout-abort: 0s
        timeout-retry: 0
        timeout-count: 0
    Request:
        method: GET
        url:    https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc
        args:   {}
    Times:
        min: 0.035s
        avg: 0.063s
        max: 0.284s
    Results:
        Run     Codes     Delta     TimesTimestamps
          1   (!) 500    0.000s    0.147s202


Thread :T2 : All tests passed!


Exception in thread T4:
Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-5-61a7846a36ad>", line 38, in test_tredds
AssertionError: Failed 1 tests.  Failed results: 
Stress Test:
    Test:
        code: 200
        runs: 100
        max-avg-time:  1s
        max-err-code:  0
        sum-err-code:  2
        timeout-abort: 0s
        timeout-retry: 0
        timeout-count: 0
    Request:
        method: GET
        url:    https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc
        args:   {}
    Times:
        min: 0.034s
        avg: 0.062s
        max: 0.171s
    Results:
        Run     Codes     Delta     TimesTimestamps
          1       200    0.000s    0.110s   


Thread :T1 : All tests passed!

Thread :T3 : All tests passed!


 Thread(T0) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc]
Progress : 50/500     Thread_id : T0


 Thread(T1) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc]
Progress : 100/500     Thread_id : T1


 Thread(T2) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?dataset=birdhouse/testdata/ta_Amon_MRI-CGCM3_decadal1980_r1i1p1_199101-200012.nc]
Progress : 150/500     Thread_id : T2


 Thread(T3) 
 Stress Test with [100] calls to [https://hirondelle.crim.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/catalog.html?datas