# Client Graph
Notebook to manipulate a dask graph at the client stage.

## Example 1: Edit submit method

In [None]:
from dask.distributed import Client
import types

# Create a Dask client
client = Client()

original_submit = client.submit

In [None]:
def modified_submit(*args, **kwargs):
    print("Entered Submit")
    print("Party if it works")
    return original_submit(*args, **kwargs)

In [None]:
client.submit = modified_submit

In [None]:
def neg(x):
    return -x


client.submit(neg, 3)

## Example 2: Edit collections_to_dsk method

In [1]:
from dask.distributed import Client
import dask
import dask.array as da

from datetime import datetime

client = Client()

original_collections_to_dsk = client.collections_to_dsk

In [14]:
def myFunc(collections, *args, **kwargs):
    c = datetime.now()
    print("Inside myFunc. Time = " + c.strftime("%H:%M:%S"))
    print("Dask Graph Tasks")
    for collection in collections:
        print(f"Task Name: {collection.name}")
    print("-----------------\n")

    return original_collections_to_dsk(collections, *args, **kwargs)


client.collections_to_dsk = myFunc
print("collections_to_dsk method has been replaced")

collections_to_dsk method has been replaced


In [8]:
x = da.ones((1000, 1000), chunks=(100, 100))
y = x + 1
z = y.mean()

In [15]:
result = client.compute(z)

Inside myFunc. Time = 19:47:32
Dask Graph Tasks
Task Name: mean_agg-aggregate-c50263a0ad47c330577b3058195ad928
-----------------



# Example 3: Do some basic caching

In [2]:
from dask.distributed import Client
import dask
import dask.array as da

from datetime import datetime

client = Client()

original_collections_to_dsk = client.collections_to_dsk

In [44]:
existing_functions = []

def myFunc(collections, *args, **kwargs):
    c = datetime.now()
    print("Inside myFunc. Time = " + c.strftime("%H:%M:%S"))

    print("Functions already processed")
    global existing_functions
    for funcs in existing_functions:
        print(f"{funcs}")

    print("Dask Graph Tasks")
    for collection in collections:
        print(f"Task Name: {collection.name}")
        print(collection.dask)
        # print all collection attributes
        #for key in dir(collection):
        #    print(f"{key}: {getattr(collection,key)}")
        #    try:
        #        print(f"{key}: {getattr(collection,key)()}")
        #    except:
        #        continue
        #print(dir(collection))
        if collection.name not in existing_functions:
            print(f"Adding {collection.name} to existing_functions")
            existing_functions.append(collection.name)
        else:
            print(f"{collection.name} has already been processed")
            # override dask highlevel graph to load a file alled collections.name
            # collection.dask...


    print("-----------------")

    return original_collections_to_dsk(collections, *args, **kwargs)


client.collections_to_dsk = myFunc
print("collections_to_dsk method has been replaced")

collections_to_dsk method has been replaced


In [45]:
x = da.ones((1000, 1000), chunks=(100, 100))
y = x + 1
z = y.mean()

In [46]:
result = client.compute(z)

Inside myFunc. Time = 21:29:14
Functions already processed
Dask Graph Tasks
Task Name: mean_agg-aggregate-c50263a0ad47c330577b3058195ad928
HighLevelGraph with 7 layers.
<dask.highlevelgraph.HighLevelGraph object at 0x7fe12d649960>
 0. ones_like-c53a97836143a00162e3470fef213e1e
 1. add-098526cf93d1ac0d3a8744f59d0814fb
 2. mean_chunk-4e5168d025885dfbdfd76df3f8083cda
 3. mean_combine-partial-0832826c732652f3243668d740a0d5f3
 4. mean_combine-partial-f33215f12dcb169d48ded3405ae5b4a2
 5. mean_combine-partial-f380b3499d120118c907e62d760350cc
 6. mean_agg-aggregate-c50263a0ad47c330577b3058195ad928

Adding mean_agg-aggregate-c50263a0ad47c330577b3058195ad928 to existing_functions
-----------------


In [47]:
result

In [48]:
result.result()

2.0

In [49]:
result = client.compute(z)

Inside myFunc. Time = 21:29:26
Functions already processed
mean_agg-aggregate-c50263a0ad47c330577b3058195ad928
Dask Graph Tasks
Task Name: mean_agg-aggregate-c50263a0ad47c330577b3058195ad928
HighLevelGraph with 7 layers.
<dask.highlevelgraph.HighLevelGraph object at 0x7fe12d649960>
 0. ones_like-c53a97836143a00162e3470fef213e1e
 1. add-098526cf93d1ac0d3a8744f59d0814fb
 2. mean_chunk-4e5168d025885dfbdfd76df3f8083cda
 3. mean_combine-partial-0832826c732652f3243668d740a0d5f3
 4. mean_combine-partial-f33215f12dcb169d48ded3405ae5b4a2
 5. mean_combine-partial-f380b3499d120118c907e62d760350cc
 6. mean_agg-aggregate-c50263a0ad47c330577b3058195ad928

mean_agg-aggregate-c50263a0ad47c330577b3058195ad928 has already been processed


AttributeError: 'list' object has no attribute 'name'

In [24]:
result

In [23]:
result.result()

2.0

In [14]:
dir(result)

['__await__',
 '__class__',
 '__dask_tokenize__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_bind_late',
 '_cb_executor',
 '_cb_executor_pid',
 '_cleared',
 '_client',
 '_counter',
 '_exception',
 '_generation',
 '_id',
 '_inform',
 '_input_state',
 '_repr_html_',
 '_result',
 '_state',
 '_traceback',
 '_uid',
 '_verify_initialized',
 'add_done_callback',
 'cancel',
 'cancelled',
 'client',
 'done',
 'exception',
 'executor',
 'key',
 'make_future',
 'release',
 'result',
 'retry',
 'status',
 'traceback',
 'type']