In [2]:
# from Jupyter / Python
import syft as sy
sy.requires(">=0.8.1,<0.8.2")
node = sy.orchestra.launch(name="my-domain", port=8080, dev_mode=True, reset=True)



✅ The installed version of syft==0.8.1 matches the requirement >=0.8.1 and the requirement <0.8.2
Starting my-domain server on 0.0.0.0:8080

Waiting for server to start

INFO:     Started server process [2857]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)


.INFO:     127.0.0.1:53942 - "GET /api/v2/metadata HTTP/1.1" 200 OK
. Done.
INFO:     127.0.0.1:53946 - "GET /api/v2/metadata HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "POST /api/v2/login HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "GET /api/v2/api?verify_key=5dbabaa72580ce3689cc91171841bfe62fe836425c1718f8cdac96d50212b61f HTTP/1.1" 200 OK


In [3]:
domain_client = node.login(email="info@openmined.org", password="changethis")


Logged into my-domain as <info@openmined.org>


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical, plot_model


db=pd.read_pickle("../datasets/sample.pkl")
mouse_data = db[db['type'].isin(['mousemove', 'mousedown', 'mouseup'])]
keyboard_data = db[db['type'].isin(['keyup', 'keydown'])]
keyboard_data=keyboard_data.drop(['ID', 'INVALID', 'X', 'Y', 'resolutionX', 'resolutionY', 'mu'], axis=1)
mouse_data=mouse_data.drop(['ID', 'INVALID', 'type', 'value', 'mu'], axis=1)
keystrokes = keyboard_data['value'].unique()
def load_mouse_data():
    USERS = set(mouse_data['user'])
    X_train = pd.DataFrame(columns = ['O','C','E','A','N'])
    X_test  = pd.DataFrame(columns = ['O','C','E','A','N'])
    y_train = pd.DataFrame()
    y_test  = pd.DataFrame()
    for index,user in enumerate(USERS) :
     X_user= mouse_data[mouse_data['user']==user]
     X = X_user[['O','C','E','A','N']]
     y = X_user['user']
     X_train_user, X_test_user, y_train_user, y_test_user = train_test_split(X, y, test_size=0.2, random_state=42)
     X_train = pd.concat([X_train,X_train_user])
     X_test= pd.concat([X_test,X_test_user])
     y_train = pd.concat([y_train,y_train_user])
     y_test= pd.concat([y_test,y_test_user])
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    return  X_train.astype('float32'), X_test.astype('float32'),y_train.astype('float32'), y_test.astype('float32')



In [None]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
import tensorflow_privacy
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib
import pandas as pd
from dataloader import load_mouse_data
tf.get_logger().setLevel('ERROR')

# Load the training and validation data
X_train, X_test, y_train, y_test = load_mouse_data()
train_dataset = tf.data.Dataset.from_tensor_slices(X_train)
test_dataset = tf.data.Dataset.from_tensor_slices(X_test)
train_labels_dataset = tf.data.Dataset.from_tensor_slices(y_train)
test_labels_dataset = tf.data.Dataset.from_tensor_slices(y_test)

dpsgd = True
input_dim = 5
num_classes = 40
epochs = 10
batch_size = 64
training_size = 0.8
training_length = len(X_train)
testing_length = len(X_test)
l2_norm_clip = 1.0
noise_multiplier = 1.1
num_microbatches = 128
learning_rate = 0.001

combined_train_dataset = tf.data.Dataset.zip((train_dataset, train_labels_dataset)).batch(batch_size)
combined_test_dataset= tf.data.Dataset.zip((test_dataset, test_labels_dataset)).batch(batch_size)
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(input_dim,)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])


loss = tf.keras.losses.CategoricalCrossentropy(reduction=tf.losses.Reduction.NONE)
if dpsgd:
    optimizer = tensorflow_privacy.DPKerasAdamOptimizer(l2_norm_clip=l2_norm_clip,noise_multiplier=noise_multiplier,learning_rate=learning_rate)
else :
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)


model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
print("Train on {training_length} samples, validate on {testing_length} samples".format(training_length=training_length, testing_length=testing_length))
model.fit(X_train, y_train,
          epochs=epochs,
          validation_data=(X_test, y_test),
          batch_size=batch_size)

loss, accuracy = model.evaluate(X_test, y_test)
print("Loss:", loss)
print("Accuracy:", accuracy)

privacy_report=compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(number_of_examples=training_length,
                                              batch_size=batch_size,
                                              noise_multiplier=0.8,
                                              num_epochs=epochs,
                                              delta=1e-5,
                                              used_microbatching=False)
print(privacy_report)

In [None]:
X_train =  sy.ActionObject.from_obj(X_train)
y_train =  sy.ActionObject.from_obj(y_train)
X_test =  sy.ActionObject.from_obj(X_test)
y_test =  sy.ActionObject.from_obj(y_test)
X_train_domain_obj = domain_client.api.services.action.set(X_train)
y_train_domain_obj = domain_client.api.services.action.set(y_train)
X_test_domain_obj = domain_client.api.services.action.set(X_test)
y_test_domain_obj = domain_client.api.services.action.set(y_test)


In [None]:
@sy.syft_function(input_policy=sy.ExactMatch(X_train=X_train_domain_obj.id,y_train=y_train_domain_obj.id,X_test=X_test,y_train_domain_obj.id,y_test=y_test_domain_obj.id),
                  output_policy=sy.SingleExecutionExactOutput())
def train(X_train,y_train,X_test,y_test):
 import keras
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.optimizers import Adam
 # import adam optimizer

 X_train, X_test, y_train, y_test = load_mouse_data()
 input_shape = (X_train.shape[1],)
 # Create the model
 model = Sequential()

 # Add layers to the model
 model.add(Dense(64, activation='relu', input_shape=input_shape))
 model.add(Dense(32, activation='relu'))
 model.add(Dense(1, activation='sigmoid'))

 # Compile the model
 optimizer =Adam(learning_rate=0.001)
 model.compile(optimizer = optimizer, loss='binary_crossentropy', metrics=['accuracy'])

 # Train the model
 model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

 # Evaluate the model
 loss, accuracy = model.evaluate(X_test, y_test)
 return loss,accuracy

In [None]:
request = domain_client.code.request_code_execution(train)
request

In [None]:
request.approve()


In [None]:
result_ptr = domain_client.code.train_mlp(X_train=X_train.id,y_train=y_train.id,X_test=X_test.id,y_test=y_test.id)

In [None]:
result_ptr.get()