# Introduction

- Previous: [00-do-setup-domain.ipynb](./00-do-setup-domain.ipynb)
- Next: [02-do-review-code.ipynb](./02-do-review-code.ipynb)

# Prerequisites
You have ran the [00-do-setup-domain.ipynb](./00-do-setup-domain.ipynb) and have a DS account on both `canada-domain` and `italy-domain`.

In [None]:
# third party

# syft absolute
import syft as sy

CANADA_DOMAIN_PORT = 9081
ITALY_DOMAIN_PORT = 9082

# Log in to the domain nodes as a data scientist

In [None]:
# Launch the domain nodes we setup in the previous notebook
canada_node = sy.orchestra.launch(
    name="canada-domain", port=CANADA_DOMAIN_PORT, dev_mode=True
)
italy_node = sy.orchestra.launch(
    name="italy-domain", port=ITALY_DOMAIN_PORT, dev_mode=True
)

In [None]:
ds_canada_client = canada_node.login(email="sheldon@caltech.edu", password="changethis")
ds_italy_client = italy_node.login(email="sheldon@caltech.edu", password="changethis")

# Find datasets across multiple domains

In [None]:
canada_census_data = ds_canada_client.datasets[-1].assets[0]
italy_census_data = ds_italy_client.datasets[-1].assets[0]

# Find an available enclave

In [None]:
all_enclaves = ds_canada_client.enclaves.get_all() + ds_italy_client.enclaves.get_all()
all_enclaves

In [None]:
enclave = all_enclaves[0]
enclave

# Create and submit a distributed project

In [None]:
# Code to perform the multi-party computation


@sy.syft_function(
    input_policy=sy.ExactMatch(
        canada_census_data=canada_census_data,
        italy_census_data=italy_census_data,
    ),
    output_policy=sy.SingleExecutionExactOutput(),
    deployment_policy=sy.RunOnEnclave(
        provider=enclave,
        # image=sy.DockerWorkerConfig(dockerfile=dockerfile_str),
        # workers_num=4,
        # worker_pool_name=worker_pool_name,
        # timeout=300,
        # result_persistence={"storage_path": "/data/enclave", "retention_policy": "30d"}
    ),
)
def compute_census_matches(canada_census_data, italy_census_data):
    # third party
    import recordlinkage

    # Index step
    indexer = recordlinkage.Index()
    indexer.block("given_name")

    candidate_links = indexer.index(canada_census_data, italy_census_data)

    # Comparison step
    compare_cl = recordlinkage.Compare()

    compare_cl.exact("given_name", "given_name", label="given_name")
    compare_cl.string(
        "surname", "surname", method="jarowinkler", threshold=0.85, label="surname"
    )
    compare_cl.exact("date_of_birth", "date_of_birth", label="date_of_birth")
    compare_cl.exact("suburb", "suburb", label="suburb")
    compare_cl.exact("state", "state", label="state")
    compare_cl.string("address_1", "address_1", threshold=0.85, label="address_1")

    features = compare_cl.compute(
        candidate_links, canada_census_data, italy_census_data
    )

    # Classification step
    matches = features[features.sum(axis=1) > 3]

    return len(matches)

In [None]:
# Check result of execution on mock data
mock_result = compute_census_matches(
    canada_census_data=canada_census_data.mock,
    italy_census_data=italy_census_data.mock,
)
mock_result

In [None]:
new_project = sy.Project(
    name="Census Matching",
    description="Match census data between Canada and Italy",
    members=[ds_canada_client, ds_italy_client],
)
new_project

In [None]:
project = new_project.send()

In [None]:
project

In [None]:
project.create_code_request(
    compute_census_matches, clients=[ds_canada_client, ds_italy_client]
)

In [None]:
# TODO: Should the Data Scientist see all the requests intially when the object is not retrieved from a domain
assert len(project.requests) == 0

# Cleanup local domain servers

In [None]:
if canada_node.deployment_type.value == "python":
    canada_node.land()

if italy_node.deployment_type.value == "python":
    italy_node.land()