### Import Libraries

In [4]:
import numpy as np
from dask import delayed
from dask.distributed import Client
import time

### Initiate a Dask Client

In [None]:
client = Client()

### Call the client object to get the address. 
- Once you have the address (e.g., http://127.0.0.1:8787/status), click on the dask icon on the left-hand side of the jupyter-lab interface.
- Copy and paste the address into the field at the top. Press enter.
- Select which displays you would like to access. A few good ones to start with include:
    - Graph
    - Memory Use
    - Processing Tasks
    - Profile
    - Profile Server
    - Progress
    - Task Stream
    - Workers

A default layout can also be specified by going to the File menu and choosing "Launch Dask Dashboard Layout". See more here: https://github.com/dask/dask-labextension?tab=readme-ov-file#configuring-a-default-layout

In [None]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 16,Total memory: 48.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:54154,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:54166,Total threads: 4
Dashboard: http://127.0.0.1:54174/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:54157,
Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-vru4i7j9,Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-vru4i7j9

0,1
Comm: tcp://127.0.0.1:54169,Total threads: 4
Dashboard: http://127.0.0.1:54171/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:54159,
Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-zy_wchc1,Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-zy_wchc1

0,1
Comm: tcp://127.0.0.1:54168,Total threads: 4
Dashboard: http://127.0.0.1:54175/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:54161,
Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-n6mf61xx,Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-n6mf61xx

0,1
Comm: tcp://127.0.0.1:54167,Total threads: 4
Dashboard: http://127.0.0.1:54170/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:54163,
Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-t6wnfh76,Local directory: /var/folders/8v/nddcvmn12wl6rr_v0cwvvjvr0000gr/T/dask-scratch-space/worker-t6wnfh76


### Standard Python Execution

In [9]:
# Start the clock
start_time = time.time()

# Two large arrays (e.g. parts of a dataset)
A, B = np.random.random((10000, 10000)), np.random.random((10000, 10000))

# Sum each array.
sumA, sumB = np.sum(A), np.sum(B)

# Add the two sums.
total = np.sum((sumA, sumB))

execution_duration = time.time() - start_time

print(f"The result {total} took {execution_duration} seconds to run")

The result 100001446.05345993 took 0.5374143123626709 seconds to run


### Dask Execution

In [11]:
# Start the clock
start_time = time.time()

# Two large arrays (e.g. parts of a dataset)
A, B = np.random.random((10000, 10000)), np.random.random((10000, 10000))

# Create delayed tasks for summing each array
sumA, sumB = delayed(np.sum)(A), delayed(np.sum)(B)

# Create another task to add the two sums
total = delayed(lambda x, y: x + y)(sumA, sumB)

result = total.compute()  # triggers parallel execution of the task graph

execution_duration = time.time() - start_time
print(f"The result {result} took {execution_duration} seconds to run")

The result 99996104.51270065 took 1.399691104888916 seconds to run


In [12]:
client.shutdown()