# Introduction

TODO

# Prerequisites
You have ran the [00-do-domain-setup.ipynb](./00-do-domain-setup.ipynb) and have a DS account on both `canada-domain` and `italy-domain`.

In [None]:
# third party
import pytest

# syft absolute
import syft as sy
from syft.service.response import SyftAttributeError

CANADA_DOMAIN_PORT = 9081
ITALY_DOMAIN_PORT = 9082

# Log in to the domain nodes as a data scientist

In [None]:
# Launch the domain nodes we setup in the previous notebook
canada_node = sy.orchestra.launch(
    name="canada-domain", port=CANADA_DOMAIN_PORT, dev_mode=True
)
italy_node = sy.orchestra.launch(
    name="italy-domain", port=ITALY_DOMAIN_PORT, dev_mode=True
)

In [None]:
ds_canada_client = canada_node.login(email="sheldon@caltech.edu", password="changethis")
ds_italy_client = italy_node.login(email="sheldon@caltech.edu", password="changethis")

# Find datasets across multiple domains

In [None]:
canada_census_data = ds_canada_client.datasets[-1].assets[0]
italy_census_data = ds_italy_client.datasets[-1].assets[0]

# Create and submit a distributed project

In [None]:
new_project = sy.DistributedProject(
    name="Census Matching",
    description="Match census data between Canada and Italy",
)
new_project

In [None]:
# Code to perform the multi-party computation
@sy.syft_function_single_use(
    canada_census_data=canada_census_data,
    italy_census_data=italy_census_data,
)
def compute_census_matches(canada_census_data, italy_census_data):
    # third party
    import recordlinkage

    # Index step
    indexer = recordlinkage.Index()
    indexer.block("given_name")

    candidate_links = indexer.index(canada_census_data, italy_census_data)

    # Comparison step
    compare_cl = recordlinkage.Compare()

    compare_cl.exact("given_name", "given_name", label="given_name")
    compare_cl.string(
        "surname", "surname", method="jarowinkler", threshold=0.85, label="surname"
    )
    compare_cl.exact("date_of_birth", "date_of_birth", label="date_of_birth")
    compare_cl.exact("suburb", "suburb", label="suburb")
    compare_cl.exact("state", "state", label="state")
    compare_cl.string("address_1", "address_1", threshold=0.85, label="address_1")

    features = compare_cl.compute(
        candidate_links, canada_census_data, italy_census_data
    )

    # Classification step
    matches = features[features.sum(axis=1) > 3]

    return len(matches)

In [None]:
# Check result of execution on mock data
mock_result = compute_census_matches(
    canada_census_data=canada_census_data.mock,
    italy_census_data=italy_census_data.mock,
)
mock_result

In [None]:
# Add code to the project
new_project.add_code(compute_census_matches)

In [None]:
# Send the project to all the domains for approval
project = new_project.send()
project

In [None]:
assert project.requests[0].node_uid == ds_canada_client.id
assert project.requests[1].node_uid == ds_italy_client.id

In [None]:
# Test: data scientist should not be able to approve the request
with pytest.raises(SyftAttributeError) as exc_info:
    project.requests[0].approve()
exc_info.value

# Cleanup local domain servers

In [None]:
if canada_node.node_type.value == "python":
    canada_node.land()

if italy_node.node_type.value == "python":
    italy_node.land()