In [None]:
import syft as sy
import recordlinkage
from recordlinkage.datasets import load_febrl4

# Create Nodes and connect to gateway

create enclave node

In [None]:
#Local Python Mode
enclave_node = sy.orchestra.launch(name="Enclave",
                                   node_type=sy.NodeType.ENCLAVE,
                                   local_db=True,
                                   dev_mode=True,
                                   reset=True)

In [None]:
from syft.abstract_node import NodeType
assert enclave_node.python_node.node_type == NodeType.ENCLAVE

Create canada node & italy node

In [None]:
ca_node = sy.orchestra.launch(name="Canada",
                              local_db=True,
                              reset=True,
                              dev_mode=True)
it_node = sy.orchestra.launch(name="Italy",
                              local_db=True,
                              reset=True,
                              dev_mode=True) 

In [None]:
assert ca_node.python_node.node_type == NodeType.DOMAIN
assert it_node.python_node.node_type == NodeType.DOMAIN

Create gateway Node

In [None]:
gateway_node = sy.orchestra.launch(name="gateway",
                                   node_type = sy.NodeType.GATEWAY,
                                   local_db=True,
                                   reset=True,
                                   dev_mode=True)

Connect nodes to gateway

In [None]:
enclave_guest_client = enclave_node.client
ca_guest_client = ca_node.client
it_guest_client = it_node.client

In [None]:
from syft.client.gateway_client import GatewayClient
from syft.client.enclave_client import EnclaveClient
from syft.client.domain_client import DomainClient

assert isinstance(enclave_guest_client ,EnclaveClient)
assert isinstance(ca_guest_client ,DomainClient)
assert isinstance(it_guest_client ,DomainClient)
assert isinstance(gateway_node.client, GatewayClient)

In [None]:
# Connect enclave to gateway
from syft.service.response import SyftSuccess
res = enclave_guest_client.connect_to_gateway(handle=gateway_node)
assert isinstance(res, SyftSuccess)
res

In [None]:
# Connect Canada to gateway
res = ca_guest_client.connect_to_gateway(handle=gateway_node)
assert isinstance(res, SyftSuccess)
res

In [None]:
# Connect Italy to gateway
res = it_guest_client.connect_to_gateway(handle=gateway_node)
assert isinstance(res, SyftSuccess)
res

# DOs

In [None]:
do_ca_client = ca_node.login(email="info@openmined.org", password="changethis")
do_it_client = it_node.login(email="info@openmined.org", password="changethis")

In [None]:
from syft.client.domain_client import DomainClient
assert isinstance(do_ca_client, DomainClient)
assert isinstance(do_it_client, DomainClient)

## Upload dataset

In [None]:
# Using public datasets from  Freely Extensible Biomedical Record Linkage (Febrl)
canada_census_data, italy_census_data = load_febrl4()

In [None]:
for (dataset, client, country) in zip([canada_census_data, italy_census_data], [do_ca_client, do_it_client], ["Canada", "Italy"]):
    private_data, mock_data = dataset[:2500] , dataset[2500:]
    dataset = sy.Dataset(
        name=f"{country} - FEBrl Census Data",
        description="abc",
        asset_list=[
            sy.Asset(
                name="census_data",
                mock=mock_data,
                data=private_data,
                shape=private_data.shape,
                mock_is_real=True
            )
        ]
    )
    client.upload_dataset(dataset)
    

In [None]:
assert len(do_ca_client.datasets.get_all()) == 1
assert len(do_it_client.datasets.get_all()) == 1

## create accounts for DS

In [None]:
for client in [do_ca_client, do_it_client]:
    res = client.register(
        name = "Sheldon",
        email = "sheldon@caltech.edu",
        password = "changethis"
    )
    assert isinstance(res, SyftSuccess)

# DS

## Login into gateway as guest

In [None]:
ds_gateway_client = gateway_node.client

In [None]:
#Explore the domains and enclaves connected to the gateway
ds_gateway_client.domains

In [None]:
#logs into canada as proxy_client
ds_ca_proxy_client = ds_gateway_client.domains[0]
ds_ca_proxy_client.login(email="sheldon@caltech.edu" , password="changethis")
assert ds_ca_proxy_client.name == "Canada"
assert ds_ca_proxy_client.connection.proxy_target_uid == do_ca_client.id
assert isinstance(ds_ca_proxy_client, DomainClient)

In [None]:
#logs into italy as proxy_client
ds_it_proxy_client = ds_gateway_client.domains[1]
ds_it_proxy_client.login(email="sheldon@caltech.edu" , password="changethis")
assert ds_it_proxy_client.name == "Italy"
assert ds_it_proxy_client.connection.proxy_target_uid == do_it_client.id
assert isinstance(ds_it_proxy_client, DomainClient)

In [None]:
# Creates and account and logs into enclave as proxy client
ds_enclave_proxy_client = ds_gateway_client.enclaves[0]
ds_enclave_proxy_client.login(email="sheldon@caltech.edu" , password="changethis",name="Sheldon", register=True)
assert ds_enclave_proxy_client.name == "Enclave"
assert ds_enclave_proxy_client.connection.proxy_target_uid == enclave_guest_client.id
assert isinstance(ds_enclave_proxy_client, EnclaveClient)

## Find datasets

In [None]:
canada_census_data = ds_ca_proxy_client.datasets[-1].assets[0]
italy_census_data = ds_it_proxy_client.datasets[-1].assets[0]

## Create Request

In [None]:
@sy.syft_function_single_use(canada_census_data=canada_census_data, italy_census_data=italy_census_data)
def compute_census_matches(canada_census_data, italy_census_data):
    import recordlinkage
        
    # Index step
    indexer = recordlinkage.Index()
    indexer.block("given_name")

    candidate_links = indexer.index(canada_census_data, italy_census_data)

    # Comparison step
    compare_cl = recordlinkage.Compare()

    compare_cl.exact("given_name", "given_name", label="given_name")
    compare_cl.string("surname", "surname", method="jarowinkler", threshold=0.85, label="surname")
    compare_cl.exact("date_of_birth", "date_of_birth", label="date_of_birth")
    compare_cl.exact("suburb", "suburb", label="suburb")
    compare_cl.exact("state", "state", label="state")
    compare_cl.string("address_1", "address_1", threshold=0.85, label="address_1")

    features = compare_cl.compute(candidate_links, canada_census_data, italy_census_data)

    # Classification step
    matches = features[features.sum(axis=1) > 3]
    
    return len(matches)
    

In [None]:
#Checking result of mock data execution
mock_result = compute_census_matches(
                       canada_census_data=canada_census_data.mock,
                       italy_census_data=italy_census_data.mock
                    )
mock_result

In [None]:
req = ds_enclave_proxy_client.request_code_execution(compute_census_matches)
req

In [None]:
assert isinstance(req, sy.service.request.request.Request)

# DOs

## Approve

In [None]:
for client in [do_ca_client, do_it_client]:
    res = client.requests[-1].approve()
    assert isinstance(res, SyftSuccess)

# DS

##  Get result

In [None]:
status = ds_enclave_proxy_client.code.get_all()[-1].status
status

In [None]:
for st in status.base_dict.values():
    assert st == sy.service.request.request.UserCodeStatus.EXECUTE

In [None]:
result_pointer = ds_enclave_proxy_client.code.compute_census_matches(
    canada_census_data=canada_census_data,
    italy_census_data=italy_census_data
)

In [None]:
result_pointer

In [None]:
result_pointer.syft_action_data == 858

In [None]:
real_result = result_pointer.get()
real_result

In [None]:
assert real_result == 813