# Federated Learning with Flower project

Tutorials:
    
    https://github.com/OpenMined/PySyft/blob/dev/examples/tutorials/advanced/Federated%20Dataset.ipynb

In [8]:
import torch as th
import os
import numpy as np
import torchvision
import syft as sy

W0802 17:26:18.142209  1324 secure_random.py:22] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow (1.14.0). Fix this by compiling custom ops.


In [9]:
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F
from torch import optim
from collections import OrderedDict

%matplotlib inline

In [10]:
#creating a class with hyperparameters

class Arguments():
    def __init__(self):
        self.batch_size = 16
        self.test_batch_size = 16
        self.seed = 1


In [11]:
args = Arguments()

In [12]:
hook= sy.TorchHook(th)

In [13]:
#create workers
w1 = sy.VirtualWorker(hook, id="w1")
w2 = sy.VirtualWorker(hook, id="w2")
secure_worker3 = sy.VirtualWorker(hook, id="secure_worker3")

In [14]:
#share pointers between workers
w1.add_workers([w2, secure_worker3])
w2.add_workers([w1, secure_worker3])
secure_worker3.add_workers([w1, w2])

compute_nodes = [w1, w2]

W0802 17:26:18.856052  1324 base.py:624] Worker w2 already exists. Replacing old worker which could cause                     unexpected behavior
W0802 17:26:18.857048  1324 base.py:624] Worker secure_worker3 already exists. Replacing old worker which could cause                     unexpected behavior
W0802 17:26:18.858047  1324 base.py:624] Worker w1 already exists. Replacing old worker which could cause                     unexpected behavior
W0802 17:26:18.859071  1324 base.py:624] Worker secure_worker3 already exists. Replacing old worker which could cause                     unexpected behavior
W0802 17:26:18.860042  1324 base.py:624] Worker w1 already exists. Replacing old worker which could cause                     unexpected behavior
W0802 17:26:18.862036  1324 base.py:624] Worker w2 already exists. Replacing old worker which could cause                     unexpected behavior


In [91]:
data_dir = 'flower_data'
# number of subprocesses to use for data loading
#num_workers = 1
# how many samples per batch to load
#batch_size = 68
# percentage of training set to use as validation
#valid_size = 0.2

# TODO: Define transforms for the training data and testing data
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])


# Pass transforms in here, then run the next cell to see how the transforms look
train_data = datasets.ImageFolder(data_dir + "\\train\\", transform=train_transforms)
test_data = datasets.ImageFolder(data_dir + '\\valid', transform=test_transforms)


trainloader = th.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)
testloader = th.utils.data.DataLoader(test_data, batch_size=16)

In [49]:
data_size = math.ceil(len(train_data) / len(compute_nodes))
print(len(train_data))

6552


In [108]:
import math
import logging
from torch.utils.data import Dataset

logger = logging.getLogger(__name__)

def dataset_federate(data_loader, workers):

    #Add a method to easily transform a torch.Dataset or a sy.BaseDataset
    #into a sy.FederatedDataset. The dataset given is split in len(workers)
    #part and sent to each workers
    logger.info("Scanning and sending data to {}...".format(", ".join([w.id for w in workers])))

    # take ceil to have exactly len(workers) sets after splitting
    data_size = math.ceil(len(train_data) / len(workers))
    print(len(train_data))

    # Fix for old versions of torchvision
#     if not hasattr(dataset, "data"):
#         if hasattr(dataset, "train_data"):
#             dataset.data = dataset.train_data
#         elif hasattr(dataset, "test_data"):
#             dataset.data = dataset.test_data
#         else:
#             raise AttributeError("Could not find inputs in dataset")
#     if not hasattr(dataset, "targets"):
#         if hasattr(dataset, "train_labels"):
#             dataset.targets = dataset.train_labels
#         elif hasattr(dataset, "test_labels"):
#             dataset.targets = dataset.test_labels
#         else:
#             raise AttributeError("Could not find targets in dataset")
    print(workers)
    dorkers=[0,0]
    train_distributed_dataset = []
    #data_loader = th.utils.data.DataLoader(dataset, batch_size=data_size, drop_last=True)
    for dataset_idx, (data, targets) in enumerate(data_loader):
        worker = workers[dataset_idx % len(workers)]
        #print("Sending data to worker %s", worker.id)
        dorkers[dataset_idx%2]+=1
        data=th.tensor(data)
        data = data.send(worker)
        targets=th.tensor(targets) 
        #print(targets)
        targets = targets.send(worker)
        #print("Sending data to worker", worker.id)

        train_distributed_dataset.append((data, targets))  # .send(worker)
        
    print(dorkers)
    #print(len(datasets))
    #logger.debug("Done!")
    return train_distributed_dataset
  


In [109]:
from torchvision import datasets
from syft.frameworks.torch.federated import FederatedDataset, FederatedDataLoader, BaseDataset
datasets.ImageFolder.federate = dataset_federate

dist_trainset = train_data.federate((compute_nodes))



6552
[<VirtualWorker id:w1 #tensors:14077>, <VirtualWorker id:w2 #tensors:14077>]
[3276, 3276]


In [110]:
dist_trainset

[((Wrapper)>[PointerTensor | me:97388867260 -> w1:28255832092],
  (Wrapper)>[PointerTensor | me:9532153264 -> w1:21930910001]),
 ((Wrapper)>[PointerTensor | me:96964918345 -> w2:39541707940],
  (Wrapper)>[PointerTensor | me:70279559699 -> w2:50881892256]),
 ((Wrapper)>[PointerTensor | me:52680074695 -> w1:3510851943],
  (Wrapper)>[PointerTensor | me:22307241957 -> w1:65699875092]),
 ((Wrapper)>[PointerTensor | me:55679361848 -> w2:28827405283],
  (Wrapper)>[PointerTensor | me:61894109213 -> w2:88005737872]),
 ((Wrapper)>[PointerTensor | me:68248133237 -> w1:82530351570],
  (Wrapper)>[PointerTensor | me:34335816292 -> w1:15484810407]),
 ((Wrapper)>[PointerTensor | me:1948016275 -> w2:11132503795],
  (Wrapper)>[PointerTensor | me:26166498566 -> w2:32662843731]),
 ((Wrapper)>[PointerTensor | me:8454915752 -> w1:76236272541],
  (Wrapper)>[PointerTensor | me:23261290546 -> w1:76500459164]),
 ((Wrapper)>[PointerTensor | me:91339034922 -> w2:51871826284],
  (Wrapper)>[PointerTensor | me:69917

In [None]:
len(dist_trainset)

0

In [70]:
federated_dataset=FederatedDataset(federated_trainset)

AssertionError: On each worker, the input and target must have the same number of rows.

In [None]:
bob_trainset = federated_trainset.datasets['w1']
alice_trainset = federated_trainset.datasets['w2']

In [32]:
train_idx

3276

In [33]:
train_idx = int(len(train_data)/2)


In [34]:
# Dataset
#!dir flower_data\train

# Sending toy datasets to virtual workers
w1_train_dataset = sy.BaseDataset(train_data[:train_idx]).send(w1)
w2_train_dataset = sy.BaseDataset(train_data[train_idx:]).send(w2)


ValueError: too many values to unpack (expected 2)

In [15]:
w1_train_data = train_data[:train_idx]
w1_test_data = train_data[train_idx:]

ValueError: too many values to unpack (expected 2)