# Dask Arrays - deeper dive

## Scalar operations 

In [1]:
import dask.array as da

In [2]:
my_arr = da.random.randint(10, size=20, chunks=3)

In [3]:
my_arr.compute()

array([2, 4, 3, 0, 4, 7, 8, 4, 1, 0, 8, 2, 4, 5, 1, 6, 1, 8, 4, 0])

In [4]:
my_hundred_arr = my_arr + 100
my_hundred_arr.compute()

array([102, 104, 103, 100, 104, 107, 108, 104, 101, 100, 108, 102, 104,
       105, 101, 106, 101, 108, 104, 100])

In [5]:
(my_arr * (-1)).compute()

array([-2, -4, -3,  0, -4, -7, -8, -4, -1,  0, -8, -2, -4, -5, -1, -6, -1,
       -8, -4,  0])

## Reductions

In [6]:
dask_sum = my_arr.sum()
dask_sum

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,17 Tasks,1 Chunks
Type,int64,numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Count 17 Tasks 1 Chunks Type int64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,17 Tasks,1 Chunks
Type,int64,numpy.ndarray


In [7]:
my_arr.compute()

array([2, 4, 3, 0, 4, 7, 8, 4, 1, 0, 8, 2, 4, 5, 1, 6, 1, 8, 4, 0])

In [8]:
dask_sum.compute()

72

In [10]:
my_ones_arr = da.ones((10,10), chunks=2, dtype=int)

In [11]:
my_ones_arr.compute()

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [12]:
my_ones_arr.mean(axis=0).compute()

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [13]:
my_custom_array = da.random.randint(10, size=(4,4), chunks=(1,4))

In [14]:
my_custom_array.compute()

array([[9, 5, 6, 0],
       [5, 3, 7, 6],
       [0, 9, 5, 7],
       [6, 7, 7, 9]])

In [15]:
my_custom_array.mean(axis=0).compute()

array([5.  , 6.  , 6.25, 5.5 ])

In [16]:
my_custom_array.mean(axis=1).compute()

array([5.  , 5.25, 5.25, 7.25])

## Slicing

In [17]:
my_custom_array[1:3, 2:4]

Unnamed: 0,Array,Chunk
Bytes,32 B,16 B
Shape,"(2, 2)","(1, 2)"
Count,6 Tasks,2 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 32 B 16 B Shape (2, 2) (1, 2) Count 6 Tasks 2 Chunks Type int64 numpy.ndarray",2  2,

Unnamed: 0,Array,Chunk
Bytes,32 B,16 B
Shape,"(2, 2)","(1, 2)"
Count,6 Tasks,2 Chunks
Type,int64,numpy.ndarray


In [18]:
my_custom_array[1:3, 2:4].compute()

array([[7, 6],
       [5, 7]])

Like with numpy slicing, We pass slice instead of index like this: [start:end].  
Example with numpy:  
`arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[1:5])`  
=> [2 3 4 5]

`arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
print(arr[1, 1:4])`  
=> [2 3 4 5]
=> From the second element, slice elements from index 1 to index 4 (not included):

Note: Remember that second element has index 1.

So in our dask example:
- 1:3 means we take from 2nd value to 4th value not included so 2nd and 3rd value as col
- 2:4 means we take from 3rd value to 5th value not included so 3rd and 4th value as row


## Broadcasting

In [19]:
my_custom_array.compute()

array([[9, 5, 6, 0],
       [5, 3, 7, 6],
       [0, 9, 5, 7],
       [6, 7, 7, 9]])

In [20]:
my_small_arr = da.ones(4, chunks=2)
my_small_arr.compute()

array([1., 1., 1., 1.])

In [21]:
brd_example1 = da.add(my_custom_array, my_small_arr)

In [22]:
# [[9, 5, 6, 0], + [[1, 1, 1, 1]
#  [5, 3, 7, 6],   [1, 1, 1, 1]
#  [0, 9, 5, 7],   [1, 1, 1, 1]
#  [6, 7, 7, 9]]   [1, 1, 1, 1]]
brd_example1.compute()

array([[10.,  6.,  7.,  1.],
       [ 6.,  4.,  8.,  7.],
       [ 1., 10.,  6.,  8.],
       [ 7.,  8.,  8., 10.]])

In [23]:
ten_arr = da.full_like(my_small_arr, 10)

In [24]:
ten_arr.compute()

array([10., 10., 10., 10.])

In [25]:
brd_example2 = da.add(my_custom_array, ten_arr)

In [26]:
brd_example2.compute()

array([[19., 15., 16., 10.],
       [15., 13., 17., 16.],
       [10., 19., 15., 17.],
       [16., 17., 17., 19.]])

## Reshaping

In [27]:
my_custom_array.shape

(4, 4)

In [28]:
custom_arr_1d = my_custom_array.reshape(16)

In [29]:
custom_arr_1d

Unnamed: 0,Array,Chunk
Bytes,128 B,32 B
Shape,"(16,)","(4,)"
Count,8 Tasks,4 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 128 B 32 B Shape (16,) (4,) Count 8 Tasks 4 Chunks Type int64 numpy.ndarray",16  1,

Unnamed: 0,Array,Chunk
Bytes,128 B,32 B
Shape,"(16,)","(4,)"
Count,8 Tasks,4 Chunks
Type,int64,numpy.ndarray


In [30]:
custom_arr_1d.compute()

array([9, 5, 6, 0, 5, 3, 7, 6, 0, 9, 5, 7, 6, 7, 7, 9])

# Stacking

In [31]:
stacked_arr = da.stack([brd_example1, brd_example2])

In [32]:
stacked_arr.compute()

array([[[10.,  6.,  7.,  1.],
        [ 6.,  4.,  8.,  7.],
        [ 1., 10.,  6.,  8.],
        [ 7.,  8.,  8., 10.]],

       [[19., 15., 16., 10.],
        [15., 13., 17., 16.],
        [10., 19., 15., 17.],
        [16., 17., 17., 19.]]])

In [33]:
another_stacked = da.stack([brd_example1, brd_example2], axis=1)

In [34]:
another_stacked.compute()

array([[[10.,  6.,  7.,  1.],
        [19., 15., 16., 10.]],

       [[ 6.,  4.,  8.,  7.],
        [15., 13., 17., 16.]],

       [[ 1., 10.,  6.,  8.],
        [10., 19., 15., 17.]],

       [[ 7.,  8.,  8., 10.],
        [16., 17., 17., 19.]]])

# Concatenate

In [35]:
concate_arr = da.concatenate([brd_example1, brd_example2])

In [36]:
concate_arr.compute()

array([[10.,  6.,  7.,  1.],
       [ 6.,  4.,  8.,  7.],
       [ 1., 10.,  6.,  8.],
       [ 7.,  8.,  8., 10.],
       [19., 15., 16., 10.],
       [15., 13., 17., 16.],
       [10., 19., 15., 17.],
       [16., 17., 17., 19.]])

In [37]:
another_concate_arr = da.concatenate([brd_example1, brd_example2],axis=1)

In [38]:
another_concate_arr.compute()

array([[10.,  6.,  7.,  1., 19., 15., 16., 10.],
       [ 6.,  4.,  8.,  7., 15., 13., 17., 16.],
       [ 1., 10.,  6.,  8., 10., 19., 15., 17.],
       [ 7.,  8.,  8., 10., 16., 17., 17., 19.]])