In [None]:
SYFT_VERSION = ">=0.9,<1.0.0"
package_string = f'"syft{SYFT_VERSION}"'

In [None]:
# stdlib
import os
import time

# third party
import docker
import numpy as np

# syft absolute
import syft as sy

sy.requires(SYFT_VERSION)

# syft absolute
from syft.service.worker.image_registry import SyftImageRegistry
from syft.service.worker.worker_image import SyftWorkerImage

# Local registry to test external registry


class LocalRegistryContainer:
    def __init__(self):
        self.name = "local_registry"
        self.client = docker.from_env()

    def start(self, host_port=5678):
        existing = self.get()
        if existing:
            return existing

        result = self.client.containers.run(
            "registry:2",
            name=self.name,
            detach=True,
            ports={"5000/tcp": host_port},
            labels={"orgs.openmined.syft": "local-registry"},
        )

        return result

    def teardown(self):
        existing = self.get()
        if existing:
            existing.stop()
            existing.remove()

    def get(self):
        try:
            result = self.client.containers.get(self.name)
            if result.status == "running":
                return result
        except docker.errors.NotFound:
            return None


local_registry_container = LocalRegistryContainer()

In [None]:
# Uncomment this to run the whole docker based custom workers
# os.environ["ORCHESTRA_DEPLOYMENT_TYPE"] = "container_stack"
# os.environ["DEV_MODE"] = "True"


# Disable inmemory worker for container stack
running_as_container = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE") in (
    "container_stack",
)

In [None]:
datasite = sy.orchestra.launch(
    name="test-datasite-1",
    dev_mode=True,
    create_producer=True,
    reset=True,
    port=8081,
)

In [None]:
datasite_client = datasite.login(email="info@openmined.org", password="changethis")

We should see a default worker pool

In [None]:
datasite_client.worker_pools

In [None]:
syft_base_worker_tag = (
    "local-dev"
    if (bool(os.environ["DEV_MODE"]) and running_as_container)
    else sy.__version__
)
syft_base_worker_tag = "0.9.1-beta.11"

#### Submit Dockerfile

In [None]:
opendp_dockerfile_str = f"""
FROM openmined/syft-backend:{syft_base_worker_tag}

RUN uv pip install opendp

""".strip()

docker_tag = "openmined/custom-worker-opendp:1.0.0"

In [None]:
docker_config = sy.DockerWorkerConfig(dockerfile=opendp_dockerfile_str)

In [None]:
# test image build locally
test_build_res = docker_config.test_image_build(tag=docker_tag)
test_build_res

In [None]:
assert isinstance(test_build_res, sy.SyftSuccess), str(test_build_res)

In [None]:
assert docker_config.dockerfile == opendp_dockerfile_str

In [None]:
submit_result = datasite_client.api.services.worker_image.submit(
    worker_config=docker_config
)

In [None]:
submit_result

In [None]:
assert isinstance(submit_result, sy.SyftSuccess), str(submit_result)

In [None]:
dockerfile_list = datasite_client.images.get_all()
dockerfile_list

In [None]:
assert len(datasite_client.images.get_all()) == 2

In [None]:
workerimage: SyftWorkerImage = None
for image in dockerfile_list:
    if not image.is_prebuilt and image.config.dockerfile == opendp_dockerfile_str:
        workerimage = image
        break

assert isinstance(workerimage, SyftWorkerImage), str(workerimage)

In [None]:
workerimage

#### Add Local Registry in Syft

In [None]:
registry_add_result = datasite_client.api.services.image_registry.add("localhost:5678")
registry_add_result

In [None]:
assert isinstance(registry_add_result, sy.SyftSuccess), str(registry_add_result)

In [None]:
images = datasite_client.api.services.image_registry.get_all()
assert len(images) == 1
images

In [None]:
local_registry = images[0]
local_registry

In [None]:
assert isinstance(local_registry, SyftImageRegistry), str(local_registry)

#### Build Image

In [None]:
pull = False if syft_base_worker_tag == "local-dev" else True
pull

In [None]:
registry_uid = local_registry.id if running_as_container else local_registry.id

docker_build_result = datasite_client.api.services.worker_image.build(
    image_uid=workerimage.id,
    tag=docker_tag,
    registry_uid=registry_uid,
    pull_image=pull,
)
docker_build_result

In [None]:
workerimage.config.dockerfile

In [None]:
assert isinstance(docker_build_result, sy.SyftSuccess), str(docker_build_result)

In [None]:
image_list = datasite_client.images.get_all()
image_list

In [None]:
for image in image_list:
    if image.id == workerimage.id:
        workerimage = (
            image  # we can also index with string using the repo_with_tag format
        )

if running_as_container:
    image_list[workerimage.built_image_tag]
    assert image_list[workerimage.built_image_tag] == workerimage

workerimage

In [None]:
def get_image_hash(tag) -> str:
    client = docker.from_env()
    try:
        image = client.images.get(tag)
        return image.id
    except docker.errors.ImageNotFound:
        return None

In [None]:
if running_as_container:
    assert workerimage.image_hash == get_image_hash(
        workerimage.built_image_tag
    ), "Worker Image image_hash does not match with built image hash"

#### Push Image to Local Registry

In [None]:
push_result = None
if running_as_container:
    # stdlib
    from time import sleep

    local_registry_container.start()
    sleep(5)

    push_result = datasite_client.api.services.worker_image.push(workerimage.id)
    assert isinstance(push_result, sy.SyftSuccess), str(push_result)

In [None]:
push_result

In [None]:
if running_as_container:
    # third party
    import requests

    base_url = f"http://{workerimage.image_identifier.registry_host}"
    expected_tag = workerimage.image_identifier.tag
    search_tag = "openmined/custom-worker-opendp"

    repos = requests.get(f"{base_url}/v2/_catalog").json()["repositories"]
    tags = requests.get(f"{base_url}/v2/{search_tag}/tags/list").json()
    tags = tags["tags"]

    print(tags)

    assert (
        search_tag in repos
    ), f"'{search_tag}' not uploaded to local registry | {repos}"
    assert (
        expected_tag in tags
    ), f"'{search_tag}' with tag {expected_tag} not available | {tags}"

#### Delete locally built image to force pull from local registry

This should make the subsequent `worker_pool.launch` pull from registry at 'localhost:5678`

In [None]:
# stdlib
from time import sleep


def remove_local_image(tag):
    client = docker.from_env()
    try:
        client.images.remove(tag)
    except docker.errors.ImageNotFound:
        pass


if running_as_container:
    remove_local_image(workerimage.built_image_tag)

#### Create Worker Pool From Image

In [None]:
worker_pool_name = "opendp-pool"
worker_pool_res = datasite_client.api.services.worker_pool.launch(
    pool_name=worker_pool_name,
    image_uid=workerimage.id,
    num_workers=2,
)

In [None]:
assert len(worker_pool_res) == 2

In [None]:
for status in worker_pool_res:
    assert status.error is None
    if running_as_container:
        assert status.worker.image.image_hash == get_image_hash(
            workerimage.built_image_tag
        ), "Worker Pool Image image_hash does not match with built image hash"

In [None]:
worker_pool_list = datasite_client.worker_pools
worker_pool_list

In [None]:
assert len(datasite_client.worker_pools.get_all()) == 2
worker_pool = None
for pool in worker_pool_list:
    if pool.name == worker_pool_name:
        worker_pool = pool
        break
assert worker_pool is not None
assert len(worker_pool.workers) == 2

In [None]:
# We can filter pools based on the image id upon which the pools were built
datasite_client.api.services.worker_pool.filter_by_image_id(image_uid=workerimage.id)

In [None]:
# Delete the second worker
second_worker = worker_pool.workers[1]

In [None]:
second_worker

#### Get Worker Logs

In [None]:
raw_worker_logs = datasite_client.api.services.worker.logs(
    uid=second_worker.id,
    raw=True,
)
raw_worker_logs

In [None]:
assert isinstance(raw_worker_logs, bytes)

In [None]:
worker_logs = datasite_client.api.services.worker.logs(
    uid=second_worker.id,
)
worker_logs

In [None]:
assert isinstance(worker_logs, str)

#### Delete Worker from Pool

In [None]:
worker_delete_res = datasite_client.api.services.worker.delete(
    uid=second_worker.id, force=True
)

In [None]:
worker_delete_res

In [None]:
assert isinstance(worker_delete_res, sy.SyftSuccess), str(worker_delete_res)

In [None]:
# Refetch the worker pool
# Ensure that the deleted worker's id is not present
for pool in datasite_client.api.services.worker_pool.get_all():
    if pool.name == worker_pool_name:
        worker_pool = pool
assert len(worker_pool.workers) == 1
for worker in worker_pool.workers:
    assert second_worker.id != worker.id

In [None]:
worker_pool

### Syft function

In [None]:
data = np.array([1, 2, 3])
data_action_obj = sy.ActionObject.from_obj(data)

data_pointer = data_action_obj.send(datasite_client)
data_pointer

In [None]:
@sy.syft_function(
    input_policy=sy.ExactMatch(x=data_pointer),
    output_policy=sy.SingleExecutionExactOutput(),
    worker_pool_name=worker_pool_name,
)
def custom_worker_func(x):
    # third party

    return {"y": x + 1}

In [None]:
custom_worker_func

In [None]:
assert custom_worker_func.worker_pool_name == worker_pool.name

In [None]:
request = datasite_client.code.request_code_execution(custom_worker_func)
request

In [None]:
datasite_client.requests[-1].approve(approve_nested=True)

In [None]:
job = datasite_client.code.custom_worker_func(x=data_pointer, blocking=False)
job

In [None]:
worker_pool = datasite_client.worker_pools[worker_pool_name]
worker_pool

In [None]:
job.wait()

In [None]:
assert job.status.value == "completed"

In [None]:
job = datasite_client.jobs[-1]
job

In [None]:
job.job_worker_id

In [None]:
# Disabling it due to Race Condition Error
# assert job.job_worker_id is not None

In [None]:
# Sleeping so that consumer state is updated
time.sleep(5)

In [None]:
# Once the work is done by the worker, its state is returned to idle again.
consuming_worker_is_now_idle = False
for worker in datasite_client.worker_pools[worker_pool_name].workers:
    if worker.id == job.job_worker_id:
        consuming_worker_is_now_idle = worker.consumer_state.value.lower() == "idle"

assert consuming_worker_is_now_idle is True

In [None]:
# Validate the result received from the syft function
result = job.wait().get()
result_matches = result["y"] == data + 1
assert result_matches.all()

#### Worker Image

In [None]:
# delete the remaining workers
for worker in worker_pool.workers:
    res = datasite_client.api.services.worker.delete(
        uid=worker.id,
    )
    assert isinstance(res, sy.SyftSuccess), str(res)

In [None]:
delete_res = datasite_client.api.services.worker_image.remove(workerimage.id)
delete_res

In [None]:
# Since the containers are delete, we should be able to delete the image
assert isinstance(delete_res, sy.SyftSuccess), str(delete_res)

In [None]:
if running_as_container:
    local_registry_container.teardown()

#### Worker Pool and Image Creation Request/Approval

In [None]:
custom_dockerfile_str_2 = f"""
FROM openmined/syft-backend:{syft_base_worker_tag}

RUN uv pip install opendp
""".strip()

docker_config_2 = sy.DockerWorkerConfig(dockerfile=custom_dockerfile_str_2)

In [None]:
submit_result = datasite_client.api.services.worker_image.submit(
    worker_config=docker_config_2
)
submit_result

In [None]:
datasite_client.images

In [None]:
# get the image that's not built
workerimage_2 = None
for im in datasite_client.images:
    if im.config == docker_config_2:
        workerimage_2 = im

##### Build image first then create pool

In [None]:
docker_tag_2 = "openmined/custom-worker-opendp:latest"

docker_build_result = datasite_client.api.services.worker_image.build(
    image_uid=workerimage_2.id,
    tag=docker_tag_2,
    pull_image=pull,
)
docker_build_result

In [None]:
opendp_pool_name = "second-opendp-pool"
pool_create_request = datasite_client.api.services.worker_pool.pool_creation_request(
    pool_name=opendp_pool_name, num_workers=2, image_uid=workerimage_2.id
)
pool_create_request

In [None]:
assert len(pool_create_request.changes) == 1

In [None]:
# get the pending request and approve it
req_result = pool_create_request.approve()
req_result

In [None]:
assert isinstance(req_result, sy.SyftSuccess), str(req_result)

In [None]:
datasite_client.worker_pools[opendp_pool_name]

In [None]:
assert datasite_client.worker_pools[opendp_pool_name]
assert len(datasite_client.worker_pools[opendp_pool_name].workers) == 2

In [None]:
# default, opendp-pool, second-opendp-pool
assert len(datasite_client.worker_pools.get_all()) == 3

Remove all `second-opendp-pool` workers

In [None]:
for worker in datasite_client.worker_pools["second-opendp-pool"].workers:
    res = datasite_client.api.services.worker.delete(uid=worker.id, force=True)
    assert isinstance(res, sy.SyftSuccess), str(res)

assert len(datasite_client.worker_pools["second-opendp-pool"].workers) == 0

Remove the `second-opendp-pool`'s worker image

In [None]:
delete_res = datasite_client.api.services.worker_image.remove(workerimage_2.id)
delete_res

In [None]:
# Since the containers are delete, we should be able to delete the image
assert isinstance(delete_res, sy.SyftSuccess), str(delete_res)

##### Request to build the image and create the pool at the same time

In [None]:
custom_dockerfile_str_3 = f"""
FROM openmined/syft-backend:{syft_base_worker_tag}

RUN uv pip install recordlinkage
""".strip()

docker_config_3 = sy.DockerWorkerConfig(dockerfile=custom_dockerfile_str_3)

docker_tag_3 = "openmined/custom-worker-recordlinkage:latest"

In [None]:
recordlinkage_pool_name = "recordlinkage-pool"
pool_image_create_request = (
    datasite_client.api.services.worker_pool.create_image_and_pool_request(
        pool_name=recordlinkage_pool_name,
        num_workers=2,
        tag=docker_tag_3,
        config=docker_config_3,
        reason="I want to do some more cool data science with PySyft and recordlinkage",
        pull_image=pull,
    )
)
pool_image_create_request

In [None]:
assert len(pool_image_create_request.changes) == 2
assert pool_image_create_request.changes[0].config == docker_config_3
assert pool_image_create_request.changes[1].num_workers == 2
assert pool_image_create_request.changes[1].pool_name == recordlinkage_pool_name

In [None]:
# get the pending request and approve it
req_result = pool_image_create_request.approve()
req_result

In [None]:
assert isinstance(req_result, sy.SyftSuccess), str(req_result)

In [None]:
# Get updated request object and status
for req in datasite_client.requests:
    if req.id == pool_image_create_request.id:
        pool_image_create_request = req

assert pool_image_create_request.status.value == 2

In [None]:
image_exists = False
recordlinkage_image = None

for im in datasite_client.images.get_all():
    if im.image_identifier and im.image_identifier.repo_with_tag == docker_tag_3:
        image_exists = True
        recordlinkage_image = im
assert image_exists
assert recordlinkage_image
recordlinkage_image

In [None]:
recordlinkage_pool = datasite_client.worker_pools[recordlinkage_pool_name]

assert recordlinkage_pool
assert len(recordlinkage_pool.workers) == 2

Cleanup `recordlinkage-pool` workers

In [None]:
for worker in recordlinkage_pool.workers:
    res = datasite_client.api.services.worker.delete(uid=worker.id, force=True)
    assert isinstance(res, sy.SyftSuccess), str(res)

Cleanup `recordlinkage-pool`'s image

In [None]:
delete_res = datasite_client.api.services.worker_image.remove(recordlinkage_image.id)
delete_res

In [None]:
datasite.land()