# Basic dask stuff

In [1]:
import dask

In [3]:
from time import sleep

def inc(x):
    sleep(1)
    return x + 1

def add(x, y):
    sleep(1)
    return x + y

In [4]:
%%time 

x = dask.delayed(inc)(1)

y = dask.delayed(inc)(2)

z = dask.delayed(add)(x, y)

z.compute()

CPU times: user 5.02 ms, sys: 5.19 ms, total: 10.2 ms
Wall time: 2.09 s


5

In [5]:
z.visualize()

RuntimeError: Drawing dask graphs requires the `graphviz` python library and the `graphviz` system library to be installed.

## Dask and for loops

In [9]:
%%time
# serial for loop

def inc(x):
    return x + 1

def double(x):
    
    return x + 2

def add(x, y):
    return x + y

data = [1, 2, 3, 4, 5]

output = []

for x in data:
    a = inc(x)
    b = double(x)
    c = add(a, b)
    output.append(c)
    
total = sum(output)

total

CPU times: user 35 µs, sys: 4 µs, total: 39 µs
Wall time: 43.2 µs


45

In [11]:
%%time
# dask for loop with dask.delayed

def inc(x):
    return x + 1

def double(x):
    
    return x + 2

def add(x, y):
    return x + y

data = [1, 2, 3, 4, 5]

output = []

for x in data:
    a = dask.delayed(inc)(x)
    b = dask.delayed(double)(x)
    c = dask.delayed(add)(a, b)
    output.append(c)
    
total = dask.delayed(sum)(output)

total.compute()

CPU times: user 0 ns, sys: 4.29 ms, total: 4.29 ms
Wall time: 4.82 ms


45

In [13]:
%%time
# dask for loop with decorators

@dask.delayed
def inc(x):
    return x + 1

@dask.delayed
def double(x):
    return x + 2

@dask.delayed
def add(x, y):
    return x + y

data = [1, 2, 3, 4, 5]

output = []

for x in data:
    a = inc(x)
    b = double(x)
    c = add(a, b)
    output.append(c)
    
total = dask.delayed(sum)(output)

total.compute()

CPU times: user 3.14 ms, sys: 0 ns, total: 3.14 ms
Wall time: 3.52 ms


45