# Data Owner

In [1]:
import os
import syft as sy
import tenseal as ts
import torch
import pandas as pd
import random
import numpy as np
import requests

import pytest
from time import time
import matplotlib.pyplot as plt
import sys
sy.logger.add(sys.stdout)

### Starting Duet Data Owner instance

In [2]:
duet = sy.launch_duet()

🎤  🎸  ♪♪♪ Starting Duet ♫♫♫  🎻  🎹

♫♫♫ >[93m DISCLAIMER[0m: [1mDuet is an experimental feature currently in beta.
♫♫♫ > Use at your own risk.
[0m
[1m
    > ❤️ [91mLove[0m [92mDuet[0m? [93mPlease[0m [94mconsider[0m [95msupporting[0m [91mour[0m [93mcommunity![0m
    > https://github.com/sponsors/OpenMined[1m

♫♫♫ > Punching through firewall to OpenGrid Network Node at:
♫♫♫ > http://ec2-18-218-7-180.us-east-2.compute.amazonaws.com:5000
♫♫♫ >
♫♫♫ > ...waiting for response from OpenGrid Network... 
♫♫♫ > [92mDONE![0m
♫♫♫ > Duet Server ID: [1mefec618a93e5d4d007bb62f93814aa03[0m

♫♫♫ > [95mSTEP 1:[0m Send the following code to your Duet Partner!

import syft as sy
duet = sy.duet("[1mefec618a93e5d4d007bb62f93814aa03[0m")

♫♫♫ > [95mSTEP 2:[0m Ask your partner for their Client ID and enter it below!
♫♫♫ > Duet Partner's Client ID: 8d0966df25e61f47b3119c7102f6780f

♫♫♫ > Connecting...

♫♫♫ > [92mCONNECTED![0m

Encryption of the test-set took 0 secondsquests: 0   M

## Evaluating the Logistic Regression on Encrypted Data

### Prepare the data

We now prepare the training and test data, the dataset was downloaded from Kaggle.

This dataset provides patients' information along with a 10-year risk of future coronary heart disease (CHD) as a label, and the goal is to build a model that can predict this 10-year CHD risk based on patients' information, you can read more about the dataset in the link provided.

In [16]:
from syft.util import get_root_data_path

def split_train_test(x, y, test_ratio=0.3):
    idxs = [i for i in range(len(x))]
    random.shuffle(idxs)
    # delimiter between test and train data
    delim = int(len(x) * test_ratio)
    test_idxs, train_idxs = idxs[:delim], idxs[delim:]
    return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]
            
def heart_disease_data():
    data = pd.read_csv(f"./framingham.csv")
    data.head()
    # drop rows with missing values
    data = data.dropna()
    # drop some features
    data = data.drop(columns=["education", "currentSmoker", "BPMeds", "diabetes", "diaBP", "BMI"])
    # balance data
    grouped = data.groupby('TenYearCHD')
    data = grouped.apply(lambda x: x.sample(grouped.size().min(), random_state=73).reset_index(drop=True))
    # extract labels
    y = torch.tensor(data["TenYearCHD"].values).float().unsqueeze(1)
    data = data.drop("TenYearCHD", 'columns')
    # standardize data
    data = (data - data.mean()) / data.std()
    x = torch.tensor(data.values).float()
    return split_train_test(x, y)


x_train, y_train, x_test, y_test = heart_disease_data()

### Making Training data Referenceable over Duet

making the traning data available.

In [4]:
x_train_ptr = x_train.send(duet, pointable=True, tags=["x_train"])
y_train_ptr = y_train.send(duet, pointable=True, tags=["y_train"])

### Look for the requests

In [6]:
duet.requests.pandas

### Approve the requests

In [5]:
duet.requests[0].accept()
duet.requests[0].accept()

Unnamed: 0,Requested Object's tags,Reason,Request ID,Requested Object's ID,Requested Object's type
0,[x_train],I would like to get the training data,<UID: 90854230809541608a55bed379ec17f7>,<UID: befbda91a96b438b94903fe07bf33d2b>,<class 'torch.Tensor'>
1,[y_train],I would like to get the training labels,<UID: a6b07546455f4b3a978323addf9c47c1>,<UID: 820c2aad01bf4c1986e7ebf32bcbc3fa>,<class 'torch.Tensor'>


### Encrypt the data


In [8]:
context = ts.Context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.global_scale = 2**40
context.generate_galois_keys()
t_start = time()
x_test = x_test[:10] #selecting 10 for encryption
enc_x_test = sy.lib.python.List([ts.ckks_vector(context, x.tolist()) for x in x_test]) #encrypting the data
t_end = time()
print(f"Encryption of the test-set took {int(t_end - t_start)} seconds")

### Making the Encrypted Test Data Referenceable over Duet

In [9]:
ctx_ptr = context.send(duet, pointable=True, tags=["context"])
enc_x_test_ptr = enc_x_test.send(duet, pointable=True, tags=["enc_x_test"])

### Look for the requests

In [11]:
# We can see that there are two requests, for the context and for the encrypted data.
duet.requests.pandas

Unnamed: 0,Requested Object's tags,Reason,Request ID,Requested Object's ID,Requested Object's type
0,[context],I would like to get the context,<UID: 3616ad7c97bb4a9db5a26b9d2b7cdd96>,<UID: cd7208cb47b548e09bb2bd14ed81e454>,<class 'tenseal.enc_context.Context'>
1,[enc_x_test],I would like to get encrypted test set,<UID: d48eb42962924f7d8fc9c8dea6be560a>,<UID: c2f2b8e1f6d64229a193e53f19d921e0>,<class 'syft.lib.python.list.List'>


### Approve the requests

In [12]:
duet.requests[0].accept()
duet.requests[0].accept()

### look at the duet store for results

In [14]:
print(duet.store.pandas)

                                        ID           Tags Description  \
0  <UID: befbda91a96b438b94903fe07bf33d2b>      [x_train]               
1  <UID: 820c2aad01bf4c1986e7ebf32bcbc3fa>      [y_train]               
2  <UID: cd7208cb47b548e09bb2bd14ed81e454>      [context]               
3  <UID: c2f2b8e1f6d64229a193e53f19d921e0>   [enc_x_test]               
4  <UID: 42a367b3ba7441a3808c56ba6b982829>  [result_eval]               

                             object_type  
0                 <class 'torch.Tensor'>  
1                 <class 'torch.Tensor'>  
2  <class 'tenseal.enc_context.Context'>  
3    <class 'syft.lib.python.list.List'>  
4    <class 'syft.lib.python.list.List'>  


### Test the accuracy

In [15]:
result_eval = duet.store["result_eval"].get(delete_obj=False)
correct = 0
for actual, expected in zip(result_eval, y_test):
    actual.link_context(context)
    actual = torch.tensor(actual.decrypt())
    actual = torch.sigmoid(actual)

    if torch.abs(actual - expected) < 0.5:
        correct += 1
        
print(f"Evaluated test_set of {len(x_test)} entries. Accuracy: {correct}/{len(x_test)} = {correct / len(x_test)}")

Evaluated test_set of 20 entries. Accuracy: 10/20 = 0.5
