In [None]:
import syft as sy

# Part 1: Join the Duet Server the Data Owner connected to

In [None]:
duet = sy.join_duet(loopback=True)

### <img src="https://github.com/OpenMined/design-assets/raw/master/logos/OM/mark-primary-light.png" alt="he-black-box" width="100"/> Checkpoint 0 : Now STOP and run the Data Owner notebook until Checkpoint 1.

# Part 2: Search for Available Data


In [None]:
# The data scientist can check the list of pointable data in Data Owner's duet store
duet.store.pandas

In [None]:
# Data Scientist wants to get the iris dataset. (S)He needs a pointer to the data and
# a pointer to the target for prediction.
data_ptr = duet.store[0]
target_ptr = duet.store[1]

# data_ptr.requires_grad = True
# target_ptr.requires_grad = True

# data_ptr is a reference to the iris dataset remotely available on data owner's server
# target_ptr is a reference to the iris dataset LABELS
# remotely available on data owner's server
print(data_ptr)
print(target_ptr)

# Part 3: Perform Logistic Regression on Iris dataset
Now the data scientist can perform machine learning on the data that is in the Data Owner's duet server, without the owner having to share his/her data.

### Basic analysis

First the data scientist needs to know some basic information about the dataset.
1. The length of the dataset
2. The input dimension
3. The output dimension

These information have to explicitly shared by the Data Owner. Let's try to find them in the data description.

In [None]:
print(duet.store.pandas["Description"][0])

print(duet.store.pandas["Description"][1])

### Train model

In [None]:
import torch

In [None]:
in_dim = 4
out_dim = 3
n_samples = 150

In [None]:
class SyNet(sy.Module):
    def __init__(self, torch_ref):
        super(SyNet, self).__init__(torch_ref=torch_ref)
        self.layer1 = self.torch_ref.nn.Linear(in_dim, 20)
        self.layer2 = self.torch_ref.nn.Linear(20, 30)
        self.out = self.torch_ref.nn.Linear(30, out_dim)

    def forward(self, x):
        x = self.torch_ref.nn.functional.relu(self.layer1(x))
        x = self.torch_ref.nn.functional.relu(self.layer2(x))
        output = self.torch_ref.nn.functional.log_softmax(self.out(x), dim=1)
        return output


local_model = SyNet(torch)

In [None]:
remote_model = local_model.send(duet)

Let's create an alias for our partner’s torch called remote_torch so we can refer to the local torch as torch and any operation we want to do remotely as remote_torch. Remember, the return values from remote_torch are Pointers, not the real objects. They mostly act the same when using them with other Pointers but you can't mix them with local torch objects.

In [None]:
remote_torch = duet.torch

In [None]:
params = remote_model.parameters()
optim = remote_torch.optim.Adam(params=params, lr=0.01)

In [None]:
def train(iterations, model, torch_ref, optim, data_ptr, target_ptr):

    losses = []

    for i in range(iterations):

        optim.zero_grad()

        output = model(data_ptr)

        loss = torch_ref.nn.functional.nll_loss(output, target_ptr.long())

        loss_item = loss.item()

        loss_value = loss_item.get(
            reason="To evaluate training progress", request_block=True, timeout_secs=5
        )

        if i % 10 == 0:
            print("Epoch", i, "loss", loss_value)

        losses.append(loss_value)

        loss.backward()

        optim.step()

    return losses

In [None]:
iteration = 100
losses = train(iteration, remote_model, remote_torch, optim, data_ptr, target_ptr)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(range(iteration), losses)
plt.ylabel("Loss")
plt.xlabel("iteration")

### Download model

In [None]:
def get_local_model(model):
    if not model.is_local:
        local_model = model.get(
            request_block=True,
            reason="To run test and inference locally",
            timeout_secs=5,
        )
    else:
        local_model = model

    return local_model


local_model = get_local_model(remote_model)

### Test on local data

In [None]:
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

In [None]:
url = "https://raw.githubusercontent.com/znreza/Federated-Learning-Course-Material/main/Lecture%20Notebooks/Duet%20Iris%20Classifier/"

iris_test = pd.read_csv(f"{url}data/iris-test.csv")
iris_test.head()

In [None]:
X_test = iris_test.loc[:, iris_test.columns != "species"]
y_test = iris_test["species"]

In [None]:
X_test = torch.FloatTensor(np.array(X_test))
y_test = torch.LongTensor(np.array(y_test))

In [None]:
preds = []
with torch.no_grad():
    for i in range(len(X_test)):
        sample = X_test[i]
        y_hat = local_model(sample.unsqueeze(0))
        pred = y_hat.argmax().item()
        print(f"Prediction: {pred} Ground Truth: {y_test[i]}")
        preds.append(pred)

In [None]:
acc = accuracy_score(y_test, preds)
print("Overall test accuracy", acc * 100)