In [None]:
SYFT_VERSION = ">=0.9,<1.0.0"
package_string = f'"syft{SYFT_VERSION}"'

In [None]:
# stdlib
import os

# third party
import kr8s
import numpy as np
import requests

# syft absolute
import syft as sy

from getpass import getpass  # noqa


sy.requires(SYFT_VERSION)

# syft absolute
from syft.service.worker.image_registry import SyftImageRegistry
from syft.service.worker.worker_image import SyftWorkerImage

In [None]:
def get_kr8s_client():
    return kr8s.api(namespace="syft")

In [None]:
def get_statefulset_by_pool_name(pool_name):
    kr8s_client = get_kr8s_client()
    pool_list = kr8s_client.get(
        "statefulsets", label_selector={"app.kubernetes.io/component": pool_name}
    )
    if len(pool_list) == 0:
        return None
    return pool_list[0]


def is_subset_dict(subset, superset):
    return all(item in superset.items() for item in subset.items())

In [None]:
os.environ["ORCHESTRA_DEPLOYMENT_TYPE"] = "remote"
os.environ["DEV_MODE"] = "True"

# Uncomment this to add custom values
# os.environ["SERVER_URL"] = "http://localhost"
# os.environ["SERVER_PORT"] = "8080"

In [None]:
datasite = sy.orchestra.launch(
    name="test-datasite-1",
    dev_mode=True,
)

In [None]:
datasite_client = datasite.login(email="info@openmined.org", password="changethis")
datasite_client

### Scaling Default Worker Pool

We should see a default worker pool

In [None]:
datasite_client.worker_pools

Scale up to 3 workers

In [None]:
datasite_client.api.services.worker_pool.scale(number=3, pool_name="default-pool")

In [None]:
result = datasite_client.api.services.worker_pool.get_by_name(pool_name="default-pool")
assert len(result.workers) == 3, str(result.to_dict())
result

In [None]:
# stdlib
# wait for some time for scale up to be ready
from time import sleep

sleep(5)

Scale down to 1 worker

In [None]:
datasite_client.api.services.worker_pool.scale(number=1, pool_name="default-pool")

In [None]:
result = datasite_client.api.services.worker_pool.get_by_name(pool_name="default-pool")
assert len(result.workers) == 1, str(result.to_dict())
result

In [None]:
default_worker_pool = datasite_client.api.services.worker_pool.get_by_name(
    pool_name="default-pool"
)
default_worker_pool

#### Submit Dockerfile

In [None]:
# syft absolute
from syft.util.util import get_latest_tag

registry = os.getenv("SYFT_BASE_IMAGE_REGISTRY", "docker.io")
repo = "openmined/syft-backend"

if "k3d" in registry:
    tag = get_latest_tag(registry, repo)
else:
    tag = sy.__version__

In [None]:
custom_dockerfile_str = f"""
FROM {registry}/{repo}:{tag}

RUN uv pip install pydicom

""".strip()

In [None]:
docker_config = sy.DockerWorkerConfig(dockerfile=custom_dockerfile_str)

In [None]:
assert docker_config.dockerfile == custom_dockerfile_str

In [None]:
submit_result = datasite_client.api.services.worker_image.submit(
    worker_config=docker_config
)
submit_result

In [None]:
assert isinstance(submit_result, sy.SyftSuccess), str(submit_result)

In [None]:
dockerfile_list = datasite_client.images.get_all()
dockerfile_list

In [None]:
assert len(dockerfile_list) == 2

In [None]:
workerimage = next(
    (
        image
        for image in dockerfile_list
        if not image.is_prebuilt and image.config.dockerfile == custom_dockerfile_str
    ),
    None,
)

assert isinstance(workerimage, SyftWorkerImage), str(workerimage)
workerimage

#### Add External Registry in Syft

In [None]:
external_registry = os.getenv("EXTERNAL_REGISTRY", registry)
external_registry_username = os.getenv("EXTERNAL_REGISTRY_USERNAME", None)
external_registry_password = os.getenv("EXTERNAL_REGISTRY_PASSWORD", None)

# external_registry = input()
# external_registry_username = getpass("Enter Registry Username")
# external_registry_password = getpass("Enter Registry Password")

In [None]:
datasite_client.api.services.image_registry.add(external_registry)

In [None]:
image_registry_list = datasite_client.api.services.image_registry.get_all()
image_registry_list

In [None]:
assert len(image_registry_list) == 1

In [None]:
local_registry = image_registry_list[0]
local_registry

In [None]:
assert isinstance(local_registry, SyftImageRegistry), str(local_registry)

In [None]:
registry_uid = local_registry.id

#### Build Image

In [None]:
docker_tag = "openmined/custom-worker:0.7.8"


datasite_client.api.services.worker_image.build(
    image_uid=workerimage.id,
    tag=docker_tag,
    registry_uid=registry_uid,
)

In [None]:
image_list = datasite_client.images.get_all()
image_list

In [None]:
# we can also index with string using the repo_with_tag format
workerimage = next((image for image in image_list if image.id == workerimage.id), None)
workerimage

In [None]:
assert workerimage is not None, str([image.__dict__ for image in image_list])
assert workerimage.is_built is not None, str(workerimage)
assert workerimage.built_at is not None, str(workerimage)
assert workerimage.image_hash is not None, str(workerimage)

#### Push Image to Local Registry

In [None]:
datasite_client.api.services.worker_image.push(
    workerimage.id,
    username=external_registry_username,
    password=external_registry_password,
)

In [None]:
base_url = f"http://{workerimage.image_identifier.registry_host}"
expected_tag = workerimage.image_identifier.tag

repos = requests.get(f"{base_url}/v2/_catalog").json()["repositories"]
tags = requests.get(f"{base_url}/v2/openmined/custom-worker/tags/list").json()
tags = tags["tags"]

assert (
    "openmined/custom-worker" in repos
), f"'openmined/custom-worker' not uploaded to local registry | {repos}"
assert (
    expected_tag in tags
), f"'openmined/custom-worker' with tag {expected_tag} not available | {tags}"

#### Create Worker Pool From Image

In [None]:
worker_pool_name = "custom-pool"
custom_pool_pod_annotations = {"test-custom-pool": "Test annotation for custom pool"}
custom_pool_pod_labels = {"test-custom-pool": "test_label_for_custom_pool"}
worker_pool_res = datasite_client.api.services.worker_pool.launch(
    pool_name=worker_pool_name,
    image_uid=workerimage.id,
    num_workers=3,
    registry_username=external_registry_username,
    registry_password=external_registry_password,
    pod_annotations=custom_pool_pod_annotations,
    pod_labels=custom_pool_pod_labels,
)

In [None]:
assert len(worker_pool_res) == 3

In [None]:
for status in worker_pool_res:
    assert status.error is None

In [None]:
worker_pool_list = datasite_client.worker_pools.get_all()
worker_pool_list

In [None]:
# check Label and Annotations for custom pool
custom_pool_statefulset = get_statefulset_by_pool_name(worker_pool_name)
assert custom_pool_statefulset is not None, "Custom pool statefulset not found"
custom_pool_pod_metadata = custom_pool_statefulset.spec.template.metadata

assert (
    "annotations" in custom_pool_pod_metadata
), "Annotations not found in custom pool pod metadata"
assert (
    "labels" in custom_pool_pod_metadata
), "Labels not found in custom pool pod metadata"

assert is_subset_dict(
    custom_pool_pod_annotations, custom_pool_pod_metadata.annotations
), "Annotations do not match in Custom pool pod metadata"
assert is_subset_dict(
    custom_pool_pod_labels, custom_pool_pod_metadata.labels
), "Labels do not match in Custom pool pod metadata"

In [None]:
assert len(worker_pool_list) == 2

In [None]:
worker_pool = next(
    (pool for pool in worker_pool_list if pool.name == worker_pool_name),
    None,
)

assert worker_pool is not None, str(
    [worker_pool.__dict__ for worker_pool in worker_pool_list]
)
assert len(worker_pool.workers) == 3

In [None]:
# We can filter pools based on the image id upon which the pools were built
filtered_result = datasite_client.api.services.worker_pool.filter_by_image_id(
    image_uid=workerimage.id
)
filtered_result

In [None]:
second_worker = worker_pool.workers[1]
second_worker

#### Get Worker Logs

In [None]:
worker_logs = datasite_client.api.services.worker.logs(
    uid=second_worker.id,
)
worker_logs

In [None]:
assert isinstance(worker_logs, str)

In [None]:
worker_pool

### Syft function

In [None]:
data = np.array([1, 2, 3])
data_action_obj = sy.ActionObject.from_obj(data)

data_pointer = data_action_obj.send(datasite_client)
data_pointer

In [None]:
@sy.syft_function(
    input_policy=sy.ExactMatch(x=data_pointer),
    output_policy=sy.SingleExecutionExactOutput(),
    worker_pool_name=worker_pool_name,
)
def custom_worker_func(x):
    # third party
    import pydicom

    print(pydicom.__version__)
    return {"y": x + 1}

In [None]:
custom_worker_func

In [None]:
assert custom_worker_func.worker_pool_name == worker_pool.name

In [None]:
request = datasite_client.code.request_code_execution(custom_worker_func)
request

In [None]:
datasite_client.requests[-1].approve(approve_nested=True)

In [None]:
job = datasite_client.code.custom_worker_func(x=data_pointer, blocking=False)
job

In [None]:
worker_pool = datasite_client.worker_pools[worker_pool_name]
worker_pool

In [None]:
job.wait()

In [None]:
assert job.status.value == "completed"

In [None]:
job_list = datasite_client.jobs.get_by_user_code_id(job.user_code_id)

In [None]:
job_refresh = job_list[0]
assert job_refresh.job_worker_id is not None, str([job.to_dict() for job in job_list])

In [None]:
# Validate the result received from the syft function
result = job.wait().get()
result_matches = result["y"] == data + 1
assert result_matches.all()

In [None]:
# Scale Down the workers
datasite_client.api.services.worker_pool.scale(number=1, pool_name=worker_pool_name)

In [None]:
assert len(datasite_client.worker_pools[worker_pool_name].worker_list) == 1

#### Worker Pool and Image Creation Request/Approval

In [None]:
dockerfile_opendp = f"""
FROM {registry}/{repo}:{tag}

RUN uv pip install opendp
""".strip()

docker_config_opendp = sy.DockerWorkerConfig(dockerfile=dockerfile_opendp)

In [None]:
submit_result = None
submit_result = datasite_client.api.services.worker_image.submit(
    worker_config=docker_config_opendp
)
submit_result

In [None]:
assert isinstance(submit_result, sy.SyftSuccess), str(submit_result)

In [None]:
_images = datasite_client.images

In [None]:
workerimage_opendp = next(
    (im for im in _images if im.config == docker_config_opendp),
    None,
)
assert workerimage_opendp is not None, str([im.__dict__ for im in _images])

##### Build image first then create pool

In [None]:
docker_tag_opendp = "openmined/custom-worker-opendp:latest"

docker_build_result = datasite_client.api.services.worker_image.build(
    image_uid=workerimage_opendp.id,
    tag=docker_tag_opendp,
    registry_uid=registry_uid,
)

docker_build_result

In [None]:
assert isinstance(docker_build_result, sy.SyftSuccess), str(docker_build_result)

In [None]:
_images = datasite_client.images

In [None]:
workerimage_opendp = next(
    (image for image in _images if image.id == workerimage_opendp.id),
    None,
)
assert workerimage_opendp is not None, str([image.__dict__ for image in _images])
assert workerimage_opendp.is_built is not None, str(workerimage_opendp.__dict__)
assert workerimage_opendp.built_at is not None, str(workerimage_opendp.__dict__)
assert workerimage_opendp.image_hash is not None, str(workerimage_opendp.__dict__)

workerimage_opendp

In [None]:
# Push OpenDP Image to registry

datasite_client.api.services.worker_image.push(
    workerimage_opendp.id,
    username=external_registry_username,
    password=external_registry_password,
)

In [None]:
pool_name_opendp = "opendp-pool"
opendp_pod_annotations = {"test-opendp-pool": "Test annotation for opendp pool"}
opendp_pod_labels = {"test-opendp-pool": "test_label_for_opendp_pool"}
pool_create_request = datasite_client.api.services.worker_pool.pool_creation_request(
    pool_name=pool_name_opendp,
    num_workers=3,
    image_uid=workerimage_opendp.id,
    pod_annotations=opendp_pod_annotations,
    pod_labels=opendp_pod_labels,
)
pool_create_request

In [None]:
assert len(pool_create_request.changes) == 1

In [None]:
# get the pending request and approve it
req_result = pool_create_request.approve(
    registry_username=external_registry_username,
    registry_password=external_registry_password,
)
req_result

In [None]:
assert isinstance(req_result, sy.SyftSuccess), str(req_result)

In [None]:
pool_opendp = datasite_client.worker_pools[pool_name_opendp]
assert not isinstance(pool_opendp, sy.SyftError), str(pool_opendp)
assert len(pool_opendp.worker_list) == 3

In [None]:
worker_pool_list = datasite_client.worker_pools.get_all()
assert len(worker_pool_list) == 3

In [None]:
# check annotations and labels for open dp pool
opendp_pool_statefulset = get_statefulset_by_pool_name(pool_name_opendp)
assert opendp_pool_statefulset is not None, "Open DP pool statefulset not found"
opendp_pool_pod_metadata = opendp_pool_statefulset.spec.template.metadata


assert (
    "annotations" in opendp_pool_pod_metadata
), "Annotations not found in opendp pool pod metadata"
assert (
    "labels" in opendp_pool_pod_metadata
), "Labels not found in opendp pool pod metadata"


assert is_subset_dict(
    opendp_pod_annotations, opendp_pool_pod_metadata.annotations
), "Annotations do not match in opendp pool pod metadata"
assert is_subset_dict(
    opendp_pod_labels, opendp_pool_pod_metadata.labels
), "Labels do not match in opendp pool pod metadata"

In [None]:
# Scale Down the workers
datasite_client.api.services.worker_pool.scale(number=1, pool_name=pool_name_opendp)

In [None]:
assert len(datasite_client.worker_pools[pool_name_opendp].worker_list) == 1

Request to build the image and create the pool at the same time

In [None]:
dockerfile_recordlinkage = f"""
FROM {registry}/{repo}:{tag}

RUN uv pip install recordlinkage
""".strip()

docker_config_recordlinkage = sy.DockerWorkerConfig(dockerfile=dockerfile_recordlinkage)

docker_tag_recordlinkage = "openmined/custom-worker-recordlinkage:latest"

In [None]:
pool_name_recordlinkage = "recordlinkage-pool"
recordlinkage_pod_annotations = {
    "test-recordlinkage-pool": "Test annotation for recordlinkage pool"
}
recordlinkage_pod_labels = {
    "test-recordlinkage-pool": "test_label_for_recordlinkage_pool"
}
pool_image_create_request = datasite_client.api.services.worker_pool.create_image_and_pool_request(
    pool_name=pool_name_recordlinkage,
    num_workers=2,
    tag=docker_tag_recordlinkage,
    config=docker_config_recordlinkage,
    registry_uid=registry_uid,
    reason="I want to do some more cool data science with PySyft and RecordLinkage!",
    pod_annotations=recordlinkage_pod_annotations,
    pod_labels=recordlinkage_pod_labels,
)
pool_image_create_request

In [None]:
assert len(pool_image_create_request.changes) == 2
assert pool_image_create_request.changes[0].config == docker_config_recordlinkage
assert pool_image_create_request.changes[1].num_workers == 2
assert pool_image_create_request.changes[1].pool_name == pool_name_recordlinkage

In [None]:
req_result = pool_image_create_request.approve(
    registry_username=external_registry_username,
    registry_password=external_registry_password,
)
req_result

In [None]:
assert isinstance(req_result, sy.SyftSuccess), str(req_result)

In [None]:
_requests = datasite_client.requests

In [None]:
pool_image_create_request = next(
    (req for req in _requests if req.id == pool_image_create_request.id),
    None,
)
assert pool_image_create_request is not None, str([req.__dict__ for req in _requests])
assert pool_image_create_request.status.value == 2, str(pool_image_create_request)

In [None]:
# check annotations and labels for recordlinkage pool
recordlinkage_pool_statefulset = get_statefulset_by_pool_name(pool_name_recordlinkage)
assert (
    recordlinkage_pool_statefulset is not None
), "RecordLinkage pool statefulset not found"
recordlinkage_pool_pod_metadata = recordlinkage_pool_statefulset.spec.template.metadata


assert is_subset_dict(
    recordlinkage_pod_annotations, recordlinkage_pool_pod_metadata.annotations
), "Annotations not found in recordlinkage pool pod metadata"
assert (
    "labels" in recordlinkage_pool_pod_metadata
), "Labels not found in recordlinkage pool pod metadata"

assert is_subset_dict(
    recordlinkage_pod_labels, recordlinkage_pool_pod_metadata.labels
), "Annotations do not match in recordlinkage pool pod metadata"

In [None]:
datasite_client.images

In [None]:
image_exists = False
for im in datasite_client.images.get_all():
    if (
        im.image_identifier
        and im.image_identifier.repo_with_tag == docker_tag_recordlinkage
    ):
        image_exists = True

assert image_exists, str([im.__dict__ for im in _images])

In [None]:
assert datasite_client.worker_pools[pool_name_recordlinkage]
assert len(datasite_client.worker_pools[pool_name_recordlinkage].worker_list) == 2

In [None]:
# Scale down the workers
datasite_client.api.services.worker_pool.scale(
    number=1, pool_name=pool_name_recordlinkage
)

In [None]:
assert len(datasite_client.worker_pools[pool_name_recordlinkage].worker_list) == 1