# Twitcher performance test

Notebook to verify that the three ways to access Twitcher have relatively the same latency.

Reproduce and test issue https://github.com/bird-house/twitcher/issues/97

To test fix https://github.com/bird-house/birdhouse-deploy/commit/84df6c944b844814ca55e3db488473cd8fcf09d7

In [11]:
import xarray as xr
import time

def extract(url):
    ds = xr.open_dataset(url, drop_variables=['ts', 'time_vectors'],
                         chunks={'time': 256, 'lat': 16, 'lon': 16})
    print("got ds")
    ds.isel(lon=slice(0, 10), lat=slice(0, 10)).to_netcdf('test.nc', compute=True)
    
testhost = 'http://host-140-8.rdext.crim.ca'   # 'https://pavics.ouranos.ca'
testfile = 'testdata/tasmax_day_BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp85_r1i1p1_1_00101-21001231.nc'

TWITCHER_LATENCY_THRESHOLD = 0.30
REQUEST_REPLIES = 3

### Thredds behind Nginx and Twitcher

In [14]:
# %%time
# NBVAL_IGNORE_OUTPUT

time_twitcher = 0

for i in range(REQUEST_REPLIES):
    starttime_twitcher = time.time()
    baseurl = f"{testhost}/twitcher/ows/proxy/thredds/dodsC/birdhouse/{testfile}"
    extract(baseurl)
    endtime_twitcher = time.time()
    iter_time = endtime_twitcher - starttime_twitcher
    print(f"iteration #{i+1}: " + str(iter_time))
    time_twitcher += iter_time
    
time_twitcher = time_twitcher / REQUEST_REPLIES
print(f"avg time: {time_twitcher}")

got ds
iteration #1: 23.291869163513184
got ds
iteration #2: 19.063221216201782
got ds
iteration #3: 21.24903392791748
avg time: 21.201374769210815


### Thredds behind Nginx

In [16]:
# %%time
# NBVAL_IGNORE_OUTPUT

time_nginx = 0

for i in range(REQUEST_REPLIES):
    starttime_nginx = time.time()
    baseurl = f"{testhost}/testthredds/dodsC/testdatasets/{testfile}"
    extract(baseurl)
    endtime_nginx = time.time()
    iter_time = endtime_nginx - starttime_nginx
    print(f"iteration #{i+1}: " + str(iter_time))
    time_nginx += iter_time
    
time_nginx = time_nginx / REQUEST_REPLIES
print(f"avg time: {time_nginx}")

got ds
iteration #1: 9.71524977684021
got ds
iteration #2: 8.379936218261719
got ds
iteration #3: 7.967864036560059
avg time: 8.687683343887329


### Thredds directly

In [10]:
# %%time
# NBVAL_IGNORE_OUTPUT

time_direct = 0

for i in range(REQUEST_REPLIES):
    starttime_direct = time.time()
    baseurl = f"{testhost}:8083/twitcher/ows/proxy/thredds/dodsC/birdhouse/{testfile}"
    extract(baseurl)
    endtime_direct = time.time()
    iter_time = endtime_direct - starttime_direct
    print(f"iteration #{i+1}: " + str(iter_time))
    time_direct += iter_time
    
time_direct = time_direct / REQUEST_REPLIES
print(f"avg time: {time_direct}")

got ds
iteration #1: 7.8373799324035645
got ds
iteration #2: 7.59890341758728
got ds
iteration #3: 7.575238466262817
avg time: 7.670507272084554


## Test result

In [38]:
# NBVAL_IGNORE_OUTPUT

twitcher_overhead = int((time_twitcher / time_nginx) * 100 - 100)
print("Twitcher overhead: " + str(twitcher_overhead) + "%")

Twitcher overhead: -5%


In [39]:
if twitcher_overhead <= TWITCHER_LATENCY_THRESHOLD * 100:
    print("success")
else:
    print("failure")

success
