In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

from scipy.sparse import coo_matrix
import dask.array as da
from dask.diagnostics import ProgressBar

**Warning: This is not working yet! I am looking for advice.**

# Preparation

In [2]:
ds = xr.open_dataset("weights.nc")
n_s = ds.dims['n_s']
col = ds['col'].values - 1
row = ds['row'].values - 1
S = ds['S'].values
A = coo_matrix((S, (row, col))) 

In [3]:
data = np.random.rand(500, A.shape[1])
data.shape

(500, 240000)

In [4]:
%time out_scipy = A.dot(data.T).T
out_scipy.shape

CPU times: user 392 ms, sys: 144 ms, total: 536 ms
Wall time: 535 ms


(500, 120000)

# apply_ufunc on numpy array

In [5]:
def apply_A(data):
    # use global A here!
    return A.dot(data.T).T

Has the same performance as before.

In [6]:
%time xr.apply_ufunc(apply_A, data)

CPU times: user 395 ms, sys: 143 ms, total: 538 ms
Wall time: 538 ms


array([[ 0.70758235,  0.65166196,  0.37301186, ...,  0.47573623,
         0.79292462,  0.73709179],
       [ 0.4111927 ,  0.70517824,  0.55168382, ...,  0.17803722,
         0.67770673,  0.64719172],
       [ 0.26985564,  0.54065578,  0.44570048, ...,  0.3291685 ,
         0.49389298,  0.22829723],
       ..., 
       [ 0.77835188,  0.37966545,  0.75609087, ...,  0.30147231,
         0.48863662,  0.56009023],
       [ 0.11881346,  0.28899334,  0.28665715, ...,  0.08285116,
         0.74617251,  0.30869556],
       [ 0.67568354,  0.50891094,  0.22110719, ...,  0.42110356,
         0.4074391 ,  0.62864727]])

# apply_ufunc on xarray DataArray

In [7]:
dr = xr.DataArray(data, 
                  dims=['extra_dims', 'grid_dims'],
                  coords=[np.arange(500), np.arange(240000)],
                  name='data'
                 )
dr

<xarray.DataArray 'data' (extra_dims: 500, grid_dims: 240000)>
array([[ 0.660312,  0.84742 ,  0.656528, ...,  0.897226,  0.972557,  0.79919 ],
       [ 0.267947,  0.811986,  0.673211, ...,  0.798675,  0.609709,  0.71872 ],
       [ 0.169884,  0.358929,  0.553505, ...,  0.593384,  0.373854,  0.016039],
       ..., 
       [ 0.850872,  0.833998,  0.208549, ...,  0.395105,  0.81532 ,  0.543201],
       [ 0.02558 ,  0.326246,  0.172092, ...,  0.733185,  0.810342,  0.104598],
       [ 0.867441,  0.418636,  0.500482, ...,  0.557743,  0.008848,  0.902175]])
Coordinates:
  * extra_dims  (extra_dims) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ...
  * grid_dims   (grid_dims) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ...

Has the same performance as before.

In [8]:
%time dr_out = xr.apply_ufunc(apply_A, dr)
dr_out

CPU times: user 398 ms, sys: 147 ms, total: 544 ms
Wall time: 543 ms


<xarray.DataArray 'data' (extra_dims: 500, grid_dims: 120000)>
array([[ 0.707582,  0.651662,  0.373012, ...,  0.475736,  0.792925,  0.737092],
       [ 0.411193,  0.705178,  0.551684, ...,  0.178037,  0.677707,  0.647192],
       [ 0.269856,  0.540656,  0.4457  , ...,  0.329168,  0.493893,  0.228297],
       ..., 
       [ 0.778352,  0.379665,  0.756091, ...,  0.301472,  0.488637,  0.56009 ],
       [ 0.118813,  0.288993,  0.286657, ...,  0.082851,  0.746173,  0.308696],
       [ 0.675684,  0.508911,  0.221107, ...,  0.421104,  0.407439,  0.628647]])
Coordinates:
  * extra_dims  (extra_dims) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ...
  * grid_dims   (grid_dims) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ...

In [9]:
# result is correct
np.array_equal(dr_out.data, out_scipy)

True

# Use dask array

In [10]:
data_dask = da.from_array(data, chunks=(100, 240000))
data_dask

dask.array<array, shape=(500, 240000), dtype=float64, chunksize=(100, 240000)>

<font color=red> Horribly slow...

In [11]:
%time out_dask = apply_A(data_dask)
out_dask.shape

CPU times: user 3.86 s, sys: 872 ms, total: 4.73 s
Wall time: 4.75 s


(500, 120000)

In [12]:
# result is correct
np.array_equal(out_dask, out_scipy)

True

# apply_ufunc on dask array

In [13]:
dr_dask = xr.DataArray(data_dask, 
                       dims=['extra_dims', 'grid_dims'],
                       name='data')
dr_dask

<xarray.DataArray 'data' (extra_dims: 500, grid_dims: 240000)>
dask.array<shape=(500, 240000), dtype=float64, chunksize=(100, 240000)>
Dimensions without coordinates: extra_dims, grid_dims

<font color=red> As slow as the previous case...

In [14]:
%time dr_out_dask = xr.apply_ufunc(apply_A, dr_dask, dask='allowed')

CPU times: user 3.81 s, sys: 939 ms, total: 4.75 s
Wall time: 4.81 s


In [15]:
# result is correct
np.array_equal(dr_out_dask, out_scipy)

True

# apply_ufunc on dask array, parallelized

<font color=red> The shape is wrong! </font> `grid_dims` should be 120000 (output), not 240000 (input).

In [16]:
%%time 
dr_out_1 = xr.apply_ufunc(apply_A, dr_dask,
                              dask='parallelized', output_dtypes=[float])
dr_out_1

CPU times: user 141 ms, sys: 47.5 ms, total: 189 ms
Wall time: 188 ms


In [17]:
# got a bug when actually computing...
dr_out_1.compute()

ValueError: replacement data must match the Variable's shape

Specifying `input_core_dims` doesn't fix the problem... Now the grid dimension is totally missing.

In [18]:
%%time 
dr_out_2 = xr.apply_ufunc(apply_A, dr_dask, 
                          input_core_dims=[['grid_dims']],
                          dask='parallelized', output_dtypes=[float])
dr_out_2

CPU times: user 104 ms, sys: 20.7 ms, total: 125 ms
Wall time: 124 ms


In [19]:
# Another bug when actually computing...
dr_out_2.compute()

ValueError: could not broadcast input array from shape (100,120000) into shape (100)