# Pylops-distributed - ZARR file

In this notebook we will learn how to save a numpy array in a zarr file for distributed computation.

We will use the Reflection response for Marchenko redatuming as sample data.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import dask.array as da
import pylops
import pylops_distributed
import zarr

from pylops.utils import dottest

Local cluster

In [2]:
client = pylops_distributed.utils.backend.dask(processes=False, threads_per_worker=2,
                                               n_workers=2)
client

0,1
Client  Scheduler: inproc://10.224.36.70/13311/1  Dashboard: http://localhost:8787/status,Cluster  Workers: 2  Cores: 4  Memory: 135.29 GB


SSH cluster

In [3]:
from dask.distributed import Scheduler, Client

client = Client('be-linrgsn045:8786')

In [4]:
client

0,1
Client  Scheduler: tcp://be-linrgsn045:8786  Dashboard: http://be-linrgsn045:8787/status,Cluster  Workers: 4  Cores: 60  Memory: 541.16 GB


## File transfer

In [5]:
R = np.load('../data/marchenko/input.npz')['R']
ns, nr, nt = R.shape
print(R.shape)

(101, 101, 800)


In [12]:
shutil.rmtree('../data/marchenko/input.zarr')
Rstore = zarr.DirectoryStore('../data/marchenko/input.zarr')
Rroot = zarr.group(Rstore)

In [13]:
Rzaar = Rroot.zeros('R', shape=(nt, ns, nr), chunks=(nt // 4, ns, nr), dtype=np.float64)
Rzaar

<zarr.core.Array '/R' (800, 101, 101) float64>

In [14]:
Rzaar.info

0,1
Name,/R
Type,zarr.core.Array
Data type,float64
Shape,"(800, 101, 101)"
Chunk shape,"(200, 101, 101)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,65286400 (62.3M)


Let's write inside

In [15]:
Rzaar[:, : ,:] = R.transpose(2, 0, 1)

Now we get it into dask

In [16]:
Rda = da.from_zarr('../data/marchenko/input.zarr/R')
Rda

Unnamed: 0,Array,Chunk
Bytes,65.29 MB,16.32 MB
Shape,"(800, 101, 101)","(200, 101, 101)"
Count,5 Tasks,4 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 65.29 MB 16.32 MB Shape (800, 101, 101) (200, 101, 101) Count 5 Tasks 4 Chunks Type float64 numpy.ndarray",101  101  800,

Unnamed: 0,Array,Chunk
Bytes,65.29 MB,16.32 MB
Shape,"(800, 101, 101)","(200, 101, 101)"
Count,5 Tasks,4 Chunks
Type,float64,numpy.ndarray


In [20]:
xda = da.ones((nt, nr, 1), chunks=(nt // 4, nr, 1))
xda

Unnamed: 0,Array,Chunk
Bytes,646.40 kB,161.60 kB
Shape,"(800, 101, 1)","(200, 101, 1)"
Count,4 Tasks,4 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 646.40 kB 161.60 kB Shape (800, 101, 1) (200, 101, 1) Count 4 Tasks 4 Chunks Type float64 numpy.ndarray",1  101  800,

Unnamed: 0,Array,Chunk
Bytes,646.40 kB,161.60 kB
Shape,"(800, 101, 1)","(200, 101, 1)"
Count,4 Tasks,4 Chunks
Type,float64,numpy.ndarray


In [22]:
yda = da.matmul(Rda, xda)
yda.compute()

array([[[-1.29722250e-02],
        [-1.48214355e-02],
        [-1.66006250e-02],
        ...,
        [-1.66006050e-02],
        [-1.48217915e-02],
        [-1.29719550e-02]],

       [[-1.34834872e-02],
        [-1.53386862e-02],
        [-1.71435312e-02],
        ...,
        [-1.71451092e-02],
        [-1.53392422e-02],
        [-1.34834392e-02]],

       [[-1.41127174e-02],
        [-1.60446894e-02],
        [-1.78595634e-02],
        ...,
        [-1.78603414e-02],
        [-1.60448614e-02],
        [-1.41156814e-02]],

       ...,

       [[ 1.55003680e-03],
        [ 1.42788280e-03],
        [ 1.56028080e-03],
        ...,
        [ 1.15279940e-03],
        [ 5.59381900e-04],
        [ 9.99814000e-04]],

       [[ 3.74552600e-04],
        [ 1.18967600e-03],
        [ 1.83778560e-03],
        ...,
        [ 1.49492206e-03],
        [ 1.29627180e-03],
        [ 1.49053780e-03]],

       [[-8.13596000e-04],
        [-1.25375600e-05],
        [ 6.31703600e-04],
        ...,
        