In [None]:
# TODO: also move to dataset repo
# stdlib
import os

if not os.path.exists("ages_dataset.csv"):
    !curl -O https://openminedblob.blob.core.windows.net/csvs/ages_dataset.csv

if not os.path.exists("ages_mock_dataset.csv"):
    !curl -O https://openminedblob.blob.core.windows.net/csvs/ages_mock_dataset.csv

In [None]:
def helper_function():
    # third party
    import pandas as pd

    # syft absolute
    import syft as sy

    # launching a test node
    _ = sy.orchestra.launch(name="test_domain", port=8085, dev_mode=False, reset=True)

    # logging in with default credentials (only for example)
    domain = sy.login(email="info@openmined.org", password="changethis", port=8085)

    age_df = pd.read_csv("ages_dataset.csv")
    age_df = age_df.dropna(how="any")

    age_mock_df = pd.read_csv("ages_mock_dataset.csv")
    age_mock_df = age_mock_df.dropna(how="any")

    dataset = sy.Dataset(
        name="Age Dataset",
        description="some description",
        asset_list=[
            sy.Asset(
                name="Age Data 2023",
                data=age_df,
                mock=age_mock_df,
            )
        ],
    )

    dataset.add_contributor(name="Markus", role="Uploader", email="markus@gmail.com")
    dataset.assets[0].add_contributor(
        name="Markus", role="Uploader", email="markus@gmail.com"
    )

    # Uploading the dataset
    domain.upload_dataset(dataset)

    # Register a new user as a GUEST
    domain.register(
        name="Jane Doe",
        email="jane@caltech.edu",
        password="abc123",
        password_verify="abc123",
        institution="California Institute of Technology",
    )


def helper_function_approval():
    # syft absolute
    import syft as sy

    domain = sy.login(email="info@openmined.org", password="changethis", port=8085)
    requests = domain.requests
    requests[0].approve()


def helper_function_denial():
    # syft absolute
    import syft as sy

    domain = sy.login(email="info@openmined.org", password="changethis", port=8085)
    requests = domain.requests
    requests[0].deny(reason="Please add appropriate noise level.")


helper_function()

In [None]:
# syft absolute
# Login as guest / data scientist
import syft as sy

client = sy.login(port="8085", email="jane@caltech.edu", password="abc123")

In [None]:
# Select mock data

mock_df = client.datasets[0].assets[0].mock

In [None]:
mock_df.head()

In [None]:
mock_df[mock_df["Manner of death"] == "natural causes"]["Age of death"].mean()

In [None]:
# first, extract the desired asset -- will be passed to input_policy

mock = client.datasets[0].assets[0]

In [None]:
# basic template for the query function


@sy.syft_function(
    input_policy=sy.ExactMatch(private_dataset=mock),
    output_policy=sy.SingleExecutionExactOutput(),
)
def example_function(private_dataset):
    # customize your query here
    pass

In [None]:
@sy.syft_function_single_use(private_dataset=mock)
def example_function_shorter_annotation(private_dataset):
    # customize your query here
    pass

In [None]:
# the parameter name needs to match the argument name specified in the input policy (e.g. `bubble_tea_data`)


mock = client.datasets[0].assets[0]


@sy.syft_function_single_use(private_dataset=mock)
def average_age_of_death_for_natural_causes_v0(private_dataset):
    # customize your query here

    df = private_dataset
    result = df[df["Manner of death"] == "natural causes"]["Age of death"].mean()

    return float(result)

In [None]:
result = average_age_of_death_for_natural_causes_v0(private_dataset=mock)
print(f"Result from function call: {result}")

In [None]:
# The result above should be equal to the same code run locally

result_local_computation = mock_df[mock_df["Manner of death"] == "natural causes"][
    "Age of death"
].mean()
print(f"Result from local computation: {result_local_computation}")

In [None]:
average_age_of_death_for_natural_causes_v0.input_policy_type

In [None]:
average_age_of_death_for_natural_causes_v0.output_policy_type

In [None]:
print(average_age_of_death_for_natural_causes_v0.code)

In [None]:
# OpenDP documentation for mean:
# https://docs.opendp.org/en/stable/user/transformations/aggregation-mean.html
# The code below is taken from the link above, and only the data and bounds are updated


@sy.syft_function_single_use(private_dataset=mock)
def noisy_average_age_of_death_for_natural_causes_v0(private_dataset):
    # third party
    from opendp.mod import enable_features
    import opendp.prelude as dp

    enable_features("contrib")

    bounds = (0.0, 100.0)  # setting age bound
    df = private_dataset
    data = list(
        df[df["Manner of death"] == "natural causes"]["Age of death"].astype(float)
    )

    input_space = dp.vector_domain(dp.atom_domain(T=float)), dp.symmetric_distance()
    count_meas = input_space >> dp.t.then_count() >> dp.m.then_laplace(1.0)
    dp_count = count_meas(data)

    mean_meas = (
        input_space
        >> dp.t.then_clamp(bounds)
        >> dp.t.then_resize(dp_count, constant=5.0)
        >> dp.t.then_mean()
        >> dp.m.then_laplace(1.0)
    )

    return mean_meas(data)

In [None]:
# The noisy result should be slightly different from the real one


noisy_result = noisy_average_age_of_death_for_natural_causes_v0(private_dataset=mock)
noisy_result

In [None]:
# Create a project

new_project = sy.Project(
    name="Age of death audit",
    description="I am working on linking X with Y, and first, I will explore the age of death for various causes worldwide...",
    members=[client],
)
new_project

In [None]:
# Sent the code to the quest domain. Use the function name to specify which function you want to send.
# Note: this is not the proper code submission, that will happen when you submit the project. At this stage,
# the data manager will not be able to see your code request, not until you submit the project.

new_project.create_code_request(
    noisy_average_age_of_death_for_natural_causes_v0, client
)

In [None]:
# Add another code request

new_project.create_code_request(average_age_of_death_for_natural_causes_v0, client)

In [None]:
# Let's check the code added to the domain

client.code

In [None]:
# new_project.start()
new_project.send()

In [None]:
# To check your result, you will need the asset again. This is because the code can be run on one or multiple
# assets, as specified in the input policy. Our examples will only accept one input, but it might be that some queries
# are supported on multiple inputs. For this, specify which result you are looking for.

asset = client.datasets[0].assets[0]
result = client.code.noisy_average_age_of_death_for_natural_causes_v0(
    private_dataset=asset
)
result

In [None]:
helper_function_approval()

In [None]:
# Now it should be approved:

asset = client.datasets[0].assets[0]
result = client.code.noisy_average_age_of_death_for_natural_causes_v0(
    private_dataset=asset
)
result

<div class="alert alert-block alert-info">
<b>Info:</b> &#128070; Now this result is coming from the real data! </div>

In [None]:
# I needed to get result twice?
result

In [None]:
helper_function_denial()

In [None]:
# Let's check the other request

asset = client.datasets[0].assets[0]
result = client.code.average_age_of_death_for_natural_causes_v0(private_dataset=asset)
result