In [None]:
import syft as sy
import recordlinkage
from recordlinkage.datasets import load_febrl4

# Create Nodes

create enclave node

In [None]:
#Local Python Mode
enclave_node = sy.orchestra.launch(name="enclave node",node_type="enclave", reset=True)
# Local Python  Server Mode
# enclave_node = sy.orchestra.launch(name="enclave node",node_type="enclave", port="auto", reset=True)

In [None]:
from syft.abstract_node import NodeType
assert enclave_node.python_node.node_type == NodeType.ENCLAVE

Create canada node & italy node

In [None]:
ca_node = sy.Orchestra.launch(name="canada", local_db=True, reset=True)
it_node = sy.Orchestra.launch(name="italy", local_db=True, reset=True) 

In [None]:
assert ca_node.python_node.node_type == NodeType.DOMAIN
assert it_node.python_node.node_type == NodeType.DOMAIN

# DOs

In [None]:
do_ca_client = ca_node.login(email="info@openmined.org", password="changethis")
do_it_client = it_node.login(email="info@openmined.org", password="changethis")

In [None]:
from syft.client.domain_client import DomainClient
assert isinstance(do_ca_client, DomainClient)
assert isinstance(do_it_client, DomainClient)

## Upload dataset

In [None]:
# Using public datasets from  Freely Extensible Biomedical Record Linkage (Febrl)
canada_census_data, italy_census_data = load_febrl4()

In [None]:
for (dataset, client, country) in zip([canada_census_data, italy_census_data], [do_ca_client, do_it_client], ["Canada", "Italy"]):
    private_data, mock_data = dataset[:2500] , dataset[2500:]
    dataset = sy.Dataset(
        name=f"{country} - FEBrl Census Data",
        description="abc",
        asset_list=[
            sy.Asset(
                name="census_data",
                mock=mock_data,
                data=private_data,
                shape=private_data.shape,
                mock_is_real=True
            )
        ]
    )
    client.upload_dataset(dataset)
    

In [None]:
assert len(do_ca_client.datasets.get_all()) == 1
assert len(do_it_client.datasets.get_all()) == 1

## create accounts for DS

In [None]:
for client in [do_ca_client, do_it_client]:
    res = client.register(
        name = "Sheldon",
        email = "sheldon@caltech.edu",
        password = "changethis"
    )
    assert isinstance(res, sy.service.response.SyftSuccess)

# DS

## Login

In [None]:
ds_client = enclave_node.login(name="Sheldon", email="sheldon@caltech.edu", password="changethis", register=True)

In [None]:
from syft.client.enclave_client import EnclaveClient
assert isinstance(ds_client, EnclaveClient)

In [None]:
ds_client_it = it_node.login(email="sheldon@caltech.edu" , password="changethis")

In [None]:
ds_client_ca = ca_node.login(email="sheldon@caltech.edu" , password="changethis")

In [None]:
assert isinstance(ds_client_it, DomainClient)
assert isinstance(ds_client_ca, DomainClient)

## Find datasets

In [None]:
canada_census_data = ds_client_ca.datasets[-1].assets[0]
italy_census_data = ds_client_it.datasets[-1].assets[0]

## Create Request

In [None]:
@sy.syft_function_single_use(canada_census_data=canada_census_data, italy_census_data=italy_census_data)
def compute_census_matches(canada_census_data, italy_census_data):
    import recordlinkage
        
    # Index step
    indexer = recordlinkage.Index()
    indexer.block("given_name")

    candidate_links = indexer.index(canada_census_data, italy_census_data)

    # Comparison step
    compare_cl = recordlinkage.Compare()

    compare_cl.exact("given_name", "given_name", label="given_name")
    compare_cl.string("surname", "surname", method="jarowinkler", threshold=0.85, label="surname")
    compare_cl.exact("date_of_birth", "date_of_birth", label="date_of_birth")
    compare_cl.exact("suburb", "suburb", label="suburb")
    compare_cl.exact("state", "state", label="state")
    compare_cl.string("address_1", "address_1", threshold=0.85, label="address_1")

    features = compare_cl.compute(candidate_links, canada_census_data, italy_census_data)

    # Classification step
    matches = features[features.sum(axis=1) > 3]
    
    return len(matches)
    

In [None]:
#Checking result of mock data execution
mock_result = compute_census_matches(
                       canada_census_data=canada_census_data.mock,
                       italy_census_data=italy_census_data.mock
                    )
mock_result

In [None]:
req = ds_client.request_code_execution(compute_census_matches)
req

In [None]:
assert isinstance(req, sy.service.request.request.Request)

# DOs

## Approve

In [None]:
for client in [do_ca_client, do_it_client]:
    res = client.requests[-1].approve()
    assert isinstance(res, sy.service.response.SyftSuccess)

# DS

##  Get result

In [None]:
status = ds_client.code.get_all()[-1].status
status

In [None]:
for st in status.base_dict.values():
    assert st == sy.service.request.request.UserCodeStatus.EXECUTE

In [None]:
result_pointer = ds_client.code.compute_census_matches(
    canada_census_data=canada_census_data,
    italy_census_data=italy_census_data
)

In [None]:
result_pointer

In [None]:
result_pointer.syft_action_data == 858

In [None]:
real_result = result_pointer.get()
real_result

In [None]:
assert real_result == 813