In [20]:
# stdlib
import os
import time

# third party
import docker
import numpy as np

# syft absolute
import syft as sy

# syft absolute
from syft.service.worker.image_registry import SyftImageRegistry
from syft.service.worker.worker_image import SyftWorkerImage

print(sy.__version__)

0.9.1-beta.4


In [2]:

"""
1. Launch cluster:
CLUSTER_NAME=test-datasite-1 CLUSTER_HTTP_PORT=8080 tox -e dev.k8s.start
CLUSTER_NAME=test-datasite-1 CLUSTER_HTTP_PORT=8080 tox -e dev.k8s.deploy
"""

os.environ["ORCHESTRA_DEPLOYMENT_TYPE"] = "remote"
os.environ["DEV_MODE"] = "True"
os.environ["SERVER_PORT"] = "8080"

In [3]:
datasite = sy.orchestra.launch(
    name="test-datasite-1",
    dev_mode=True,
    reset=True,
    create_producer=True,
    n_consumers=2,
    port="auto",
)

In [6]:
datasite_client = datasite.login(email="info@openmined.org", password="changethis")

Logged into <syft-dev-server: High side Datasite> as <info@openmined.org>


## Setup custom pool

In [21]:
registry_url = "localhost:5800"

datasite_client.api.services.image_registry.add(registry_url)
registry = datasite_client.api.services.image_registry[0]

In [7]:
syft_base_worker_tag = "0.9.0-beta.5"

custom_dockerfile_str = f"""
FROM openmined/syft-backend:{syft_base_worker_tag}

RUN uv pip install recordlinkage
""".strip()

docker_config = sy.DockerWorkerConfig(dockerfile=custom_dockerfile_str)
docker_tag = "openmined/custom-worker-recordlinkage:latest"

In [11]:
submit_result = datasite_client.api.services.worker_image.submit(
    worker_config=docker_config
)
submit_result

In [26]:
workerimage = datasite_client.images.get_all()[1]
workerimage

```python
class SyftWorkerImage:
  id: str = ec68c829a44e4182b213c6f1f84dd3f1
  image_identifier: str = None
  image_hash: str = None
  created_at: str = 2024-08-15 09:46:40
  built_at: str = None
  config: str = FROM openmined/syft-backend:0.9.0-beta.5

RUN uv pip install recordlinkage

```

In [28]:
docker_build_result = datasite_client.api.services.worker_image.build(
    image_uid=workerimage.id,
    tag=docker_tag,
    registry_uid=registry.id,
    pull_image=True,
)
docker_build_result

In [30]:
workerimage = datasite_client.images[1]
workerimage

```python
class SyftWorkerImage:
  id: str = ec68c829a44e4182b213c6f1f84dd3f1
  image_identifier: str = localhost:5800/openmined/custom-worker-recordlinkage:latest
  image_hash: str = "sha256:46c860184e97a6e50ba6bba344a7d71a836ae2347e6a438608e66abbd609e494"
  created_at: str = 2024-08-15 09:46:40
  built_at: str = 2024-08-15 10:02:55
  config: str = FROM openmined/syft-backend:0.9.0-beta.5

RUN uv pip install recordlinkage

```

In [31]:
pool_name = "my-pool"
worker_pool_res = datasite_client.api.services.worker_pool.launch(
    pool_name=pool_name,
    image_uid=workerimage.id,
    num_workers=2,
)
worker_pool_res

## Run job on pool

In [None]:
dataset = sy.Dataset(
    name="my-dataset",
    description="abc",
    asset_list=[
        sy.Asset(
            name="numpy-data",
            mock=np.array([10, 11, 12, 13, 14]),
            data=np.array([[15, 16, 17, 18, 19] for _ in range(100_000)]),
            mock_is_real=True,
        )
    ],
)

datasite_client.upload_dataset(dataset)

In [None]:
data = datasite_client.datasets[0].assets[0]

@sy.syft_function_single_use(data=data, worker_pool_name=pool_name)
def compute_mean(datasite, data) -> float:
    print("Computing mean...")
    return data

In [None]:
datasite_client.code.request_code_execution(compute_mean)

In [None]:
job = datasite_client.code.compute_mean(data=data, blocking=False)
job.wait()

display(job)

## Prepare migration

In [33]:
from pathlib import Path

migration_data_dir = Path(os.getenv("MIGRATION_DATA_DIR", "."))
migration_data_dir.mkdir(exist_ok=True)

blob_path = migration_data_dir / "migration.blob"
yaml_path = migration_data_dir / "migration.yaml"

blob_path.unlink(missing_ok=True)
yaml_path.unlink(missing_ok=True)

In [35]:
migration_data = datasite_client.get_migration_data(include_blobs=True)

migration_data

```python
class MigrationData:
  id: str = f62cbcadbfcb42069174dc71e4dfff09
  server_uid: str = 913be68f34d248349421d6760cd69811
  root_verify_key: str = d65da05ec0ae1d1a3d7544cb3d3eadc9c439ef46848f7723ec91545cb3613270
  num_objects: str = 44
  num_action_objects: str = 0
  includes_blobs: str = True

```

In [36]:
migration_data.save(blob_path, yaml_path=yaml_path)

assert blob_path.exists()
assert yaml_path.exists()

print(f"Saved migration data to {str(blob_path.resolve())}")

Saved migration data to /Users/eelco/dev/PySyft/notebooks/tutorials/version-upgrades/migration.blob


## Reset node, add migration data

In [38]:
# datasite = sy.orchestra.launch(
#     name="test-datasite-1",
#     dev_mode=True,
#     reset=True,
#     create_producer=True,
#     n_consumers=2,
#     port="auto",
# )
# datasite_client = datasite.login(email="info@openmined.org", password="changethis")

In [41]:
assert(len(datasite_client.images.get_all()) == 1), "This node has not been reset"

AssertionError: This node has not been reset

In [42]:
datasite_client.load_migration_data(blob_path)