In [1]:
import os

os.environ['PREFECT__FLOWS__CHECKPOINTING'] = 'True'

import time

import pandas as pd
import xarray as xr
from prefect import Flow, task

import funnel
from funnel import CacheStore, SQLMetadataStore, config
from funnel.prefect.result import FunnelResult

In [2]:
ds = xr.tutorial.open_dataset('rasm').isel(time=[0, 1])
ds

In [3]:
cache = CacheStore(
    path="/tmp/custom-cache/", readonly=False, on_duplicate_key='skip', storage_options={}
)
metadata_store = SQLMetadataStore(
    cache_store=cache,
    readonly=False,
    serializer='xarray.zarr',
    database_url="sqlite:////tmp/funnel.db",
)

In [4]:
metadata_store

SQLMetadataStore(cache_store=CacheStore(path='/tmp/custom-cache/', readonly=False, on_duplicate_key=<DuplicateKeyEnum.skip: 'skip'>, storage_options={}), readonly=False, serializer_dump_kwargs={}, serializer_load_kwargs={}, database_url='sqlite:////tmp/funnel.db', serializer='xarray.zarr')

In [5]:
metadata_store.put('foo', ds, serializer='xarray.zarr')

In [6]:
metadata_store.df

Unnamed: 0_level_0,serializer,load_kwargs,dump_kwargs,custom_fields,checksum,created_at
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
foo,xarray.zarr,{},{},{},,2021-11-19 18:46:10.500860


In [7]:
metadata_store

SQLMetadataStore(cache_store=CacheStore(path='/tmp/custom-cache/', readonly=False, on_duplicate_key=<DuplicateKeyEnum.skip: 'skip'>, storage_options={}), readonly=False, serializer_dump_kwargs={}, serializer_load_kwargs={}, database_url='sqlite:////tmp/funnel.db', serializer='xarray.zarr')

In [16]:
r = FunnelResult(
    SQLMetadataStore(
        cache_store=cache,
        readonly=False,
        serializer='xarray.zarr',
        database_url="sqlite:////tmp/funnel.db",
    )
)


@task(target="bar.zarr", result=r)
def compute():
    print(context.to_dict())
    time.sleep(10)
    ds = xr.tutorial.open_dataset('rasm').isel(time=[0, 1])
    return ds


with Flow("xarray") as flow:
    compute()

In [17]:
%%time
flow.run()

[2021-11-19 12:30:17-0700] INFO - prefect.FlowRunner | Beginning Flow run for 'xarray'
[2021-11-19 12:30:17-0700] INFO - prefect.TaskRunner | Task 'compute': Starting task run...
{'config': {'debug': False, 'home_dir': '/Users/abanihi/.prefect', 'backend': 'cloud', 'server': <Box: {'host': 'http://localhost', 'port': 4200, 'host_port': 4200, 'host_ip': '127.0.0.1', 'endpoint': 'http://localhost:4200', 'database': {'host': 'localhost', 'port': 5432, 'host_port': 5432, 'name': 'prefect_server', 'username': 'prefect', 'password': 'test-password', 'connection_url': 'postgresql://prefect:test-password@localhost:5432/prefect_server', 'volume_path': '/Users/abanihi/.prefect/pg_data'}, 'graphql': {'host': '0.0.0.0', 'port': 4201, 'host_port': 4201, 'debug': False, 'path': '/graphql/'}, 'hasura': {'host': 'localhost', 'port': 3000, 'host_port': 3000, 'admin_secret': '', 'claims_namespace': 'hasura-claims', 'graphql_url': 'http://localhost:3000/v1alpha1/graphql', 'ws_url': 'ws://localhost:3000/v

<Success: "All reference tasks succeeded.">

In [10]:
%%time
flow.run()

[2021-11-19 12:22:43-0700] INFO - prefect.FlowRunner | Beginning Flow run for 'xarray'
[2021-11-19 12:22:43-0700] INFO - prefect.TaskRunner | Task 'compute': Starting task run...
[2021-11-19 12:22:43-0700] INFO - prefect.TaskRunner | Task 'compute': Finished task run for task with final state: 'Cached'
[2021-11-19 12:22:43-0700] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
CPU times: user 66.2 ms, sys: 6.16 ms, total: 72.3 ms
Wall time: 70.2 ms


<Success: "All reference tasks succeeded.">

In [11]:
r.metadata_store.df

Unnamed: 0_level_0,serializer,load_kwargs,dump_kwargs,custom_fields,checksum,created_at
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
foo,xarray.zarr,{},{},{},,2021-11-19 18:46:10.500860
foo.zarr,xarray.zarr,{},{},{},,2021-11-19 19:22:31.049937


In [13]:
from prefect import context

{'config': {'debug': False,
  'home_dir': '/Users/abanihi/.prefect',
  'backend': 'cloud',
  'server': <Box: {'host': 'http://localhost', 'port': 4200, 'host_port': 4200, 'host_ip': '127.0.0.1', 'endpoint': 'http://localhost:4200', 'database': {'host': 'localhost', 'port': 5432, 'host_port': 5432, 'name': 'prefect_server', 'username': 'prefect', 'password': 'test-password', 'connection_url': 'postgresql://prefect:test-password@localhost:5432/prefect_server', 'volume_path': '/Users/abanihi/.prefect/pg_data'}, 'graphql': {'host': '0.0.0.0', 'port': 4201, 'host_port': 4201, 'debug': False, 'path': '/graphql/'}, 'hasura': {'host': 'localhost', 'port': 3000, 'host_port': 3000, 'admin_secret': '', 'claims_namespace': 'hasura-claims', 'graphql_url': 'http://localhost:3000/v1alpha1/graphql', 'ws_url': 'ws://localhost:3000/v1alpha1/graphql', 'execute_retry_seconds': 10}, 'ui': {'host': 'http://localhost', 'port': 8080, 'host_port': 8080, 'host_ip': '127.0.0.1', 'endpoint': 'http://localhost:808