In [3]:
from __future__ import annotations

from pathlib import Path

import numpy as np

import xclim
from xclim import sdba
from xclim.testing import open_dataset

ds = open_dataset(
    "sdba/CanESM2_1950-2100.nc", cache=True, cache_dir=Path("~/data/xclim/")
)
tx = ds.tasmax.sel(time=slice("1950", "1980")).isel(location=0)
q = np.linspace(0, 1, 50)
arr = tx.values
arr_m1 = arr.copy()
arr_m2 = arr.copy()

# do some sort of computation to make sure it's compiled.
np.square(
    sdba.nbutils._sortquantile(np.copy(arr_m1), q)
    - sdba.nbutils._nan_quantile(np.copy(arr_m2), q)
).sum()

4.2005265481208437e-26

In [4]:
sdba.nbutils._nan_quantile.dump()

DUMP CPUDispatcher[_nan_quantile, type code=2691]
  DUMP CompileResult <built-in method _nan_quantile of _dynfunc._Closure object at 0x7f70b1b28a00>
    DUMP Signature [type code: 3191]
      Argument types:
      | DUMP Array[code=2697, name=array(float32, 1d, C)]
      | DUMP Array[code=1966, name=array(float64, 1d, C)]
      | DUMP Omitted[code=2902, name=omitted(default=1.0)]
      | DUMP Omitted[code=2902, name=omitted(default=1.0)]
      Return type:
      | DUMP Array[code=1966, name=array(float64, 1d, C)]
    END DUMP
  END DUMP
END DUMP CPUDispatcher[_nan_quantile]


In [5]:
%%timeit
# sortquantile implementation
sdba.nbutils._sortquantile(arr_m1, q)

877 µs ± 10.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [6]:
%%timeit arr_m3 = np.copy(arr_m2)
# 1d numba-compiled version of xclim.core.utils._nan_quantile (cheating)
sdba.nbutils._nan_quantile(arr_m3, q)

156 µs ± 1.41 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [7]:
%%timeit
# 1d numba-compiled version of xclim.core.utils._nan_quantile
sdba.nbutils._nan_quantile(np.copy(arr_m2), q)

901 µs ± 35.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [8]:
%%timeit
# Abel's implementation
xclim.core.utils._nan_quantile(np.copy(arr_m2), q)

858 µs ± 8.82 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [13]:
%%timeit arr_m3 = np.copy(arr_m2)
# Abel's implementation (in-place (cheating)).
xclim.core.utils._nan_quantile(arr_m3, q)

221 µs ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [14]:
arr_m3 = np.copy(arr_m2)

In [15]:
%%timeit
# Abel's implementation (in-place, sorted beforehand (super cheating!)).
xclim.core.utils._nan_quantile(arr_m3, q)

217 µs ± 4.06 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [21]:
import dask
import xarray as xr

ds2 = xr.open_dataset(
    "~/data/xclim/tasmax_day_CanESM5_ssp245_r1i1p1f1_gn_20150101-21001231.nc"
)

ds_sel2 = ds2.isel(lat=0, lon=0).tasmax.compute()

In [13]:
arr2 = ds_sel2.values.copy()
percent_of_nans = 0.0
num_of_nans = int(percent_of_nans * arr2.size)
random_indices = np.random.randint(0, arr2.shape[0], size=num_of_nans)
arr2[random_indices] = np.nan

In [19]:
%%timeit
xclim.core.utils._nan_quantile(arr2, q)

430 µs ± 10.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [20]:
%%timeit
sdba.nbutils._sortquantile(arr2, q)

435 µs ± 3.86 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [16]:
arr2.shape

(31390,)

In [17]:
q

array([0.        , 0.02040816, 0.04081633, 0.06122449, 0.08163265,
       0.10204082, 0.12244898, 0.14285714, 0.16326531, 0.18367347,
       0.20408163, 0.2244898 , 0.24489796, 0.26530612, 0.28571429,
       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,
       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,
       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,
       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,
       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,
       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,
       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])