In [None]:
# third party
from recordlinkage.datasets import load_febrl4

# syft absolute
import syft as sy
from syft.abstract_node import NodeType
from syft.service.network.node_peer import NodePeer
from syft.service.network.routes import HTTPNodeRoute
from syft.service.project.project import check_route_reachability
from syft.service.response import SyftSuccess

CANADA_DOMAIN_PORT = 9081
ITALY_DOMAIN_PORT = 9082
CANADA_ENCLAVE_PORT = 9083

# Launch nodes

We will begin by launching two domain nodes and an enclave node.

In [None]:
canada_node = sy.orchestra.launch(
    name="canada-domain", port=CANADA_DOMAIN_PORT, dev_mode=True, reset=True
)
italy_node = sy.orchestra.launch(
    name="italy-domain", port=ITALY_DOMAIN_PORT, dev_mode=True, reset=True
)
canada_enclave = sy.orchestra.launch(
    name="canada-enclave",
    node_type=NodeType.ENCLAVE,
    port=CANADA_ENCLAVE_PORT,
    dev_mode=True,
    reset=True,
)

In [None]:
do_canada_client = canada_node.login(email="info@openmined.org", password="changethis")
do_italy_client = italy_node.login(email="info@openmined.org", password="changethis")

assert do_canada_client.metadata.node_type == NodeType.DOMAIN
assert do_italy_client.metadata.node_type == NodeType.DOMAIN

# Upload datasets to both domains

In [None]:
# Using public datasets from Freely Extensible Biomedical Record Linkage (Febrl) project
canada_census_data, italy_census_data = load_febrl4()

In [None]:
for dataset, client, country in zip(
    [canada_census_data, italy_census_data],
    [do_canada_client, do_italy_client],
    ["Canada", "Italy"],
):
    private_data, mock_data = dataset[:2500], dataset[2500:]
    dataset = sy.Dataset(
        name=f"{country} - FEBrl Census Data",
        description="abc",
        asset_list=[
            sy.Asset(
                name="census_data",
                mock=mock_data,
                data=private_data,
                shape=private_data.shape,
                mock_is_real=True,
            )
        ],
    )
    client.upload_dataset(dataset)

In [None]:
assert len(do_canada_client.datasets.get_all()) == 1
assert len(do_italy_client.datasets.get_all()) == 1

# Create account for data scientist on both the domains

In [None]:
for client in [do_canada_client, do_italy_client]:
    res = client.register(
        name="Sheldon",
        email="sheldon@caltech.edu",
        password="changethis",
        password_verify="changethis",
    )
    assert isinstance(res, SyftSuccess)

# Register the enclave with Canada domain

In [None]:
route = HTTPNodeRoute(host_or_ip="localhost", port=CANADA_ENCLAVE_PORT)
do_canada_client.enclaves.add(route=route)

In [None]:
assert (len(do_canada_client.enclaves.get_all())) == 1
do_canada_client.enclaves.get_all()

## Login to DS Accounts

In [None]:
ds_canada_client = canada_node.login(email="sheldon@caltech.edu", password="changethis")
ds_italy_client = italy_node.login(email="sheldon@caltech.edu", password="changethis")

## Create Association Requests

In [None]:
canada_node_peer = NodePeer.from_client(ds_canada_client)
canada_node_peer

In [None]:
italy_node_peer = NodePeer.from_client(ds_italy_client)
italy_node_peer

In [None]:
canada_conn_req = ds_canada_client.api.services.network.add_peer(italy_node_peer)
canada_conn_req

In [None]:
italy_conn_req = ds_italy_client.api.services.network.add_peer(canada_node_peer)
italy_conn_req

In [None]:
do_canada_client.requests[-1].approve()

In [None]:
do_italy_client.requests[-1].approve()

In [None]:
check_route_reachability([ds_canada_client, ds_italy_client])

# Find datasets across multiple domains

In [None]:
canada_census_data = ds_canada_client.datasets[-1].assets[0]
italy_census_data = ds_italy_client.datasets[-1].assets[0]

In [None]:
# find available enclaves
all_enclaves = ds_canada_client.enclaves.get_all() + ds_italy_client.enclaves.get_all()
all_enclaves

In [None]:
enclave = all_enclaves[0]
enclave

# Create and submit a distributed project

In [None]:
# Code to perform the multi-party computation


@sy.syft_function(
    input_policy=sy.ExactMatch(
        canada_census_data=canada_census_data,
        italy_census_data=italy_census_data,
    ),
    output_policy=sy.SingleExecutionExactOutput(),
    deployment_policy=sy.RunOnEnclave(
        provider=enclave,
        # image=sy.DockerWorkerConfig(dockerfile=dockerfile_str),
        # workers_num=4,
        # worker_pool_name=worker_pool_name,
        # timeout=300,
        # result_persistence={"storage_path": "/data/enclave", "retention_policy": "30d"}
    ),
)
def compute_census_matches(canada_census_data, italy_census_data):
    # third party
    import recordlinkage

    # Index step
    indexer = recordlinkage.Index()
    indexer.block("given_name")

    candidate_links = indexer.index(canada_census_data, italy_census_data)

    # Comparison step
    compare_cl = recordlinkage.Compare()

    compare_cl.exact("given_name", "given_name", label="given_name")
    compare_cl.string(
        "surname", "surname", method="jarowinkler", threshold=0.85, label="surname"
    )
    compare_cl.exact("date_of_birth", "date_of_birth", label="date_of_birth")
    compare_cl.exact("suburb", "suburb", label="suburb")
    compare_cl.exact("state", "state", label="state")
    compare_cl.string("address_1", "address_1", threshold=0.85, label="address_1")

    features = compare_cl.compute(
        candidate_links, canada_census_data, italy_census_data
    )

    # Classification step
    matches = features[features.sum(axis=1) > 3]

    return len(matches)

In [None]:
# Check result of execution on mock data
mock_result = compute_census_matches(
    canada_census_data=canada_census_data.mock,
    italy_census_data=italy_census_data.mock,
)
mock_result

In [None]:
new_project = sy.Project(
    name="Census Matching",
    description="Match census data between Canada and Italy",
    members=[ds_canada_client, ds_italy_client],
)
new_project

In [None]:
project = new_project.send()

In [None]:
project

In [None]:
project.create_code_request(
    compute_census_matches, clients=[ds_canada_client, ds_italy_client]
)

In [None]:
assert len(do_canada_client.code.get_all()) == 1
assert len(do_italy_client.code.get_all()) == 1

# Cleanup local domain servers

In [None]:
if canada_node.deployment_type.value == "python":
    canada_node.land()

if italy_node.deployment_type.value == "python":
    italy_node.land()

if canada_enclave.deployment_type.value == "python":
    canada_enclave.land()