In [None]:
# https://www.simonwenkel.com/2019/07/20/introduction-to-pysyft.html

# How to load a federated data set: https://github.com/OpenMined/PySyft/blob/syft_0.2.x/examples/tutorials/advanced/Federated%20Dataset.ipynb?short_path=dddb5e6

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from syft.workers.websocket_client import WebsocketClientWorker
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pickle
import os
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import Dataset


import syft as sy

In [15]:
class Arguments():
    def __init__(self):
        self.batch_size = 20
        self.test_batch_size = 1000
        self.epochs = 10
        self.lr = 0.01
        self.momentum = 0.5
        self.no_cuda = False
        self.seed = 1
        self.log_interval = 5
        self.save_model = False

args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [3]:
hook = sy.TorchHook(torch)

In [None]:
# Unused in final version, but this allows your workers to be virtual and on the same device (good for testing)
worker1 = sy.VirtualWorker(hook, id="worker1")
worker2 = sy.VirtualWorker(hook, id="worker2")
worker3 = sy.VirtualWorker(hook, id="worker3")
worker4 = sy.VirtualWorker(hook, id="worker4")

In [6]:
# Connecting to remote workers via ip address
kwargs_websocket = {"host": "localhost", "hook": hook}
# kwargs_websocket = {"host": "192.168.10.49", "hook": hook}
alice = WebsocketClientWorker(id='alice', port=8779, **kwargs_websocket)
bob = WebsocketClientWorker(id='bob', port=8778, **kwargs_websocket)

In [4]:
# Functions to manipulate input data
def split_array(a, chunk_size=80):
    d, N = a.shape
    num_splits = d // chunk_size
    b = a[:(num_splits*chunk_size),:]
    out = np.vsplit(b, num_splits)
    return out

def normalize_array(a):
    means = np.mean(a,axis=0)
    a = a - means
    stds = np.std(a, axis=0)
    a = a / stds
    return a

In [5]:
col_labels = ['X','Y','Z']
activity_labels = ['Climb_stairs', 'Pour_water', 'Drink_glass']

# Load Data
X = list()
Y = list()
for activity in activity_labels:
    for file in os.listdir('data/HMP_Dataset/'+activity):
        filepath = 'data/HMP_Dataset/'+activity+'/'+file
        arr = np.loadtxt(filepath,  delimiter=" ")
        norm_arr = normalize_array(arr)
        split_arr = split_array(norm_arr, chunk_size=80)
        X.extend(split_arr)
        Y.extend([activity]*len(split_arr))

In [7]:
# Preprocess/Manipulate data, splitting into train/test 
X_arr = np.array(X)
Y_arr = np.array(Y)

num_classes = len(activity_labels)
N, time_size, axes = X_arr.shape
X_arr = X_arr.reshape((N, time_size*axes))

# Encode labesl into integers
le = preprocessing.LabelEncoder()
Y_arr = le.fit_transform(Y_arr).reshape((len(Y_arr),1))
enc = OneHotEncoder(handle_unknown='ignore')
Y_arr = enc.fit(Y_arr).transform(Y_arr).toarray()

# Train-test Split
X_train, X_test, y_train, y_test = train_test_split(X_arr, Y_arr, test_size=0.33, random_state=42)

# Torch tensors
X_test_torch = torch.from_numpy(X_test).float()
y_test_torch = torch.from_numpy(y_test).float()
tensorDatasetTest = TensorDataset(X_test_torch, y_test_torch)
test_loader = DataLoader(tensorDatasetTest, batch_size=args.test_batch_size, shuffle=True)

In [8]:
# Assign each datapoint to a device in a federated manner
base=sy.BaseDataset(torch.from_numpy(X_train).float(),torch.from_numpy(y_train).float())
base_federated=base.federate((alice, bob))
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
                         base_federated,batch_size=args.batch_size, shuffle=True)

In [9]:
# Setup neural network layers
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(240, 100)
        self.fc2 = nn.Linear(100, 32)
        self.fc3 = nn.Linear(32, 24)
        self.fc4 = nn.Linear(24, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

In [10]:
# Train one epoch of the model
def train(args, model, device, federated_train_loader, optimizer, epoch):
    model.train()
#     For each batch of data, send to the device, have the device calculate a loss and update weights, retrieve model
    for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
        model.send(data.location) # <-- NEW: send the model to the right location
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.mse_loss(output, target)
        loss.backward()
        optimizer.step()
        model.get() # <-- NEW: get the model back
        if batch_idx % args.log_interval == 0:
            loss = loss.get()  # <-- NEW: get the loss back
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
                100. * batch_idx / len(federated_train_loader), loss.item()))

#  Test the model's accuracy and loss
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.mse_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.argmax(1, keepdim=True)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [17]:
# Train the model for a number of epochs
# model = Net().to(device)
# optimizer = optim.SGD(model.parameters(), lr=args.lr)

for epoch in range(1, args.epochs + 1):
    train(args, model, device, federated_train_loader, optimizer, epoch)
    test(args, model, device, test_loader)


Test set: Average loss: 6.7547, Accuracy: 312/469 (67%)


Test set: Average loss: 6.7505, Accuracy: 318/469 (68%)


Test set: Average loss: 6.7460, Accuracy: 324/469 (69%)


Test set: Average loss: 6.7419, Accuracy: 329/469 (70%)


Test set: Average loss: 6.7377, Accuracy: 338/469 (72%)


Test set: Average loss: 6.7337, Accuracy: 346/469 (74%)


Test set: Average loss: 6.7297, Accuracy: 348/469 (74%)


Test set: Average loss: 6.7260, Accuracy: 358/469 (76%)


Test set: Average loss: 6.7224, Accuracy: 362/469 (77%)


Test set: Average loss: 6.7188, Accuracy: 368/469 (78%)



In [12]:
def calculate_accuracies(model, name):
    print("Accuracies for " + name)
    class_correct = list(0. for i in range(len(activity_labels)))
    class_total = list(0. for i in range(len(activity_labels)))
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            _, actual = torch.max(labels, 1)
            for i, e in enumerate(predicted == actual):
                if e == True:
                    class_correct[actual[i]] += 1
                class_total[actual[i]] += 1

    for i in range(len(activity_labels)):
        print('Accuracy of %5s : %2d %%' % (
            activity_labels[i], 100 * class_correct[i] / class_total[i]))

In [18]:
calculate_accuracies(model, "general")

Accuracies for general
Accuracy of Climb_stairs : 81 %
Accuracy of Pour_water : 74 %
Accuracy of Drink_glass : 79 %
