In [None]:
# third party
import numpy as np
import pandas as pd

years = np.random.randint(1, 10, (10,))

private_data = pd.DataFrame(
    {"Salary ($USD)": years * 25_000, "Experience (Years)": years}
)
private_data

In [None]:
mock_data = pd.DataFrame({"Salary ($USD)": [100_000] * 10, "Experience (Years)": 999})
mock_data

In [None]:
# syft absolute
import syft as sy

SYFT_VERSION = ">=0.9.0,<0.9.1"
sy.requires(SYFT_VERSION)

In [None]:
high_domain = sy.orchestra.launch(
    port="8089", name="domain-high", node_side_type="high", reset=True
)

In [None]:
high_client = sy.login(port="8089", email="info@openmined.org", password="changethis")

In [None]:
low_domain = sy.orchestra.launch(
    port="8088", name="domain-low", node_side_type="low", reset=True
)

In [None]:
low_client = sy.login(port="8088", email="info@openmined.org", password="changethis")

In [None]:
high_client

In [None]:
dataset = sy.Dataset(
    name="Population Income [2023-24]",
    description="This was the annual income of the residents of this country. Generated for educational purposes.",
    citation="Smith, J., Johnson, A., & Williams, E. (2023). Imaginary People Salary Dataset [Data set].",
    url="https://example.com/fake-salary-dataset",
)

In [None]:
private_data_asset = sy.Asset(
    name="Income dataset", data=private_data, mock=sy.ActionObject.empty()
)

In [None]:
dataset.add_asset(private_data_asset)

In [None]:
high_client.upload_dataset(dataset)

In [None]:
high_client.datasets

In [None]:
mock_dataset = sy.Dataset(
    name="Population Income [2023-24]",
    description="This was the annual income of the residents of this country. Generated for educational purposes.",
    citation="Smith, J., Johnson, A., & Williams, E. (2023). Imaginary People Salary Dataset [Data set].",
    url="https://example.com/fake-salary-dataset",
)

In [None]:
mock_data_asset = sy.Asset(
    name="Income dataset",
    mock=mock_data,  # mock data
    data=mock_data,  # also mock data
)

In [None]:
mock_dataset.add_asset(mock_data_asset)

In [None]:
low_client.upload_dataset(mock_dataset)

In [None]:
low_client.datasets

In [None]:
# syft absolute

user = low_client.register(
    email="bob@caltech.edu",
    name="Scientist Bob",
    password="abc123",
    password_verify="abc123",
    institution="Caltech",
    website="caltech.edu",
)

low_client.users

In [None]:
scientist_domain = sy.login(email="bob@caltech.edu", password="abc123", port=8088)

In [None]:
scientist_domain.datasets

In [None]:
scientist_domain.datasets[0]

In [None]:
assets = scientist_domain.datasets[0].assets
assets

In [None]:
mock = assets[0].mock
mock

In [None]:
assets[0].data

In [None]:
# third party
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

data = mock

# Split the data into features (X) and target (y)
X = data[["Experience (Years)"]]
y = data["Salary ($USD)"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test data
predictions = model.predict(X_test)

# Print the predicted salaries and actual salaries
for i in range(len(predictions)):
    print(f"Experience: {X_test.iloc[i]['Experience (Years)']} years, \
    Predicted Salary: {predictions[i]}, Actual Salary: {y_test.iloc[i]}")

In [None]:
@sy.syft_function_single_use(data=assets[0])
def linear_regression(data):
    # third party
    from sklearn.linear_model import LinearRegression
    from sklearn.model_selection import train_test_split

    # Split the data into features (X) and target (y)
    X = data[["Experience (Years)"]]
    y = data["Salary ($USD)"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Initialize and train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions on the test data
    predictions = model.predict(X_test)

    return predictions

In [None]:
pred = linear_regression(data=assets[0].mock)
pred

In [None]:
pred.get()

Afterwards, they can submit their code to the data owner by creating a Syft project, in which they will embed the code:

In [None]:
new_project = sy.Project(
    name="Experience-Based Salary Prediction",
    description="Using a Linear Regression model to predict salaries based on years of experience.",
    members=[scientist_domain],
)
new_project

In [None]:
new_project.create_code_request(linear_regression, scientist_domain)

In [None]:
# @leriomaggio
# new_project.start()
new_project.send()

In [None]:
low_client.projects

In [None]:
submitted_project = low_client.projects[0]

In [None]:
code = submitted_project.requests[0].code
code

In [None]:
code.run

<div class="alert alert-block alert-info">
While Syft makes sure that the function is not tampered with, it does not perform any validation on the implementation itself. It is the data owner's responsibility to review the code & verify if it's safe to execute.
</div>


In [None]:
# Example on how to run the function on the mock data
code.run(data=mock_data)

In [None]:
# Example on how to run the function on the private data
result = code.run(data=private_data)
result

In [None]:
submitted_project.requests[0]

In [None]:
# # @leriomaggio
# cant run
# # submitted_project.requests[0].accept_by_depositing_result(result)

# cant accept low request
# # submitted_project.requests[0].approve_with_client(low_client)

# cant substitute a numpy needs to be boxed as an action object
# # temp workaround???
# result_ao = sy.ActionObject.from_obj(result)
# result_ao
# submitted_project.requests[0].deposit_result(result_ao)

In [None]:
# syncing works but
widget = sy.sync(from_client=low_client, to_client=high_client)
widget

In [None]:
# high side says its still a low side request?
# SyftError: This request is a low-side request. Please sync your results to approve.
high_client.requests[-1].approve()

In [None]:
# select the asset
asset = scientist_domain.datasets[0].assets[0]
asset

In [None]:
# cant run this yet as theres no approval
result = scientist_domain.code.linear_regression(data=asset)
result.get()