In [None]:
from __future__ import annotations

import time

import numpy as np

from xclim import sdba
from xclim.testing import open_dataset

ds = open_dataset("sdba/CanESM2_1950-2100.nc")
tx = ds.sel(time=slice("1950", "1980")).tasmax
kws = {"dim": "time", "q": np.linspace(0, 1, 50)}

## Tests with %%timeit (full 30 years)

Here `fastnanquantile` is the best algorithm out of 
* `xr.DataArray.quantile`
* `nbutils.quantile`, using: 
  * `xclim.core.utils.nan_quantile`
  * `fastnanquantile`


In [None]:
%%timeit
tx.quantile(**kws).compute()

In [None]:
%%timeit
sdba.nbutils.USE_FASTNANQUANTILE = False
sdba.nbutils.quantile(tx, **kws).compute()

In [None]:
! pip install fastnanquantile

In [None]:
%%timeit
sdba.nbutils.USE_FASTNANQUANTILE = True
sdba.nbutils.quantile(tx, **kws).compute()

## Test computation time as a function of number of points

For a smaller number of time steps <=2000, `_sortquantile` is the best algorithm in general

In [None]:
import matplotlib.pyplot as plt
import xarray as xr

num_tests = 500
timed = {}
# fastnanquantile has nothing to do with sortquantile
# I just added a third step using this variable

for use_fnq in [True, False]:
    sdba.nbutils.USE_FASTNANQUANTILE = use_fnq
    # heat-up the jit
    sdba.nbutils.quantile(xr.DataArray(np.array([0, 1.5])), dim="dim_0", q=np.array([0.5]))
    for size in np.arange(250, 2000 + 250, 250):
        da = tx.isel(time=slice(0, size))
        t0 = time.time()
        for _i in range(num_tests):
            sdba.nbutils.quantile(da, **kws).compute()
        timed[use_fnq].append([size, time.time() - t0])

for k, lab in zip([True, False], ["xclim.core.utils.nan_quantile", "fastnanquantile"], strict=False):
    arr = np.array(timed[k])
    plt.plot(arr[:, 0], arr[:, 1] / num_tests, label=lab)
plt.legend()
plt.title("Quantile computation, average time vs array size, for 50 quantiles")
plt.xlabel("Number of time steps in the distribution")
plt.ylabel("Computation time (s)")