<a href="https://colab.research.google.com/github/Berenice2018/DeepLearning/blob/master/PySyft_Simple_Federated_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports, setup


In [1]:
!pip install syft



In [2]:
import time
import datetime
import logging
import math

import numpy as np # linear algebra
#import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset

import syft as sy

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

logger = logging.getLogger(__name__)

print(torch.cuda.is_available())

W0722 22:33:18.348394 140407260469120 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0722 22:33:18.369588 140407260469120 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



True


In [3]:
# save the model on Google Drive, link Google drive to this notebook
from google.colab import drive
drive.mount('/content/gdrive')


# After executing this cell above, Drive
# files will be present in "/content/drive/My Drive".
!ls "/content/gdrive/My Drive/Colab Notebooks/flower_data/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
picco_test  test  train  train_ofgdrive  valid	valid_ofgdrive


In [0]:
# paths to training and test data
data_dir = '/content/gdrive/My Drive/Colab Notebooks/flower_data/'
train_dir = data_dir + 'train'
valid_dir = data_dir + 'valid'

#os.chdir("/content/gdrive/My Drive/Colab Notebooks/")
test_dir = data_dir + 'test'

### Architecture and helpers

In [0]:
# Make data loader based on the selected pre-trained model
def create_loaders(base, final = False):
    print('returning datasets')
    # ResNet, DenseNet expect 224, Inception expects 299
    img_size = 299 if base == 'Inception' else 224 

    transforms_train = transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(img_size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    transforms_test = transforms.Compose([
        transforms.Resize(img_size + 1),
        transforms.CenterCrop(img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load the datasets with ImageFolder
    trainset = datasets.ImageFolder(train_dir, transform=transforms_train)
    validationset = datasets.ImageFolder(valid_dir, transform=transforms_train)
    testset = datasets.ImageFolder(valid_dir, transform=transforms_test)
       
    return trainset, validationset, testset

In [0]:
#transforms a torch.Dataset or a sy.BaseDataset into a sy.FederatedDataset. 
def dataset_federate(dataset, workers):
    print('dataset_federate')

    datasets = []
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=32)
    
    for dataset_idx, (data, targets) in enumerate(data_loader):
        worker = workers[dataset_idx % len(workers)]
        data = data.send(worker)
        targets = targets.send(worker)
        datasets.append(sy.BaseDataset(data, targets))
    
    fed_dataset = sy.FederatedDataset(datasets)
    fed_loader = sy.FederatedDataLoader(fed_dataset, batch_size=32, shuffle=False, drop_last=False)
    
    return fed_loader

In [0]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader): # distributed dataset
        
        model.send(data.location) # send the model to the right location
        
        #data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        model.get() # get the model back
        
        if batch_idx % args.log_interval == 0:
            loss = loss.get() # get the loss back
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, len(train_loader) * args.batch_size, #batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
    print('finished training')  

### Instantiation,  hyperparams, model training

In [8]:
class Arguments():
    def __init__(self):
        self.batch_size = 64
        self.test_batch_size = 1000
        self.epochs = 2
        self.lr = 0.01
        self.momentum = 0.5
        self.no_cuda = False
        self.seed = 1
        self.log_interval = 10
        self.save_model = False

args = Arguments()

use_cuda = not args.no_cuda and torch.cuda.is_available()
print(torch.cuda.is_available())

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


model = models.densenet161(pretrained=True)
#model.classifier

optimizer = optim.SGD(model.parameters(), lr=args.lr) # momentum is not supported yet
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = 4)



True


In [0]:
for param in model.parameters():
    param.requires_grad = True
    
fc_in = model.classifier.in_features

transferclassifier = nn.Sequential(
                        nn.BatchNorm1d(fc_in),
                        nn.Linear(fc_in, 102)
                        )

#transfermodel.fc = transferclassifier # resnet
model.classifier = transferclassifier



In [0]:
# create workers, 
hook = sy.TorchHook(torch)

ada = sy.VirtualWorker(hook, 'ada')
bob = sy.VirtualWorker(hook, 'bob')
cyd = sy.VirtualWorker(hook, 'cyd')

In [11]:
# Create the data loaders, federated PySyft loaders are returned
my_trainset, my_validset , _ = create_loaders('Densenet')
train_loader = dataset_federate(my_trainset, (ada,bob,cyd))
#valid_loader = dataset_federate(my_validset, (ada,bob,cyd))

returning datasets
dataset_federate


### start the training

In [12]:
print(f'objects of ada= {len(ada._objects)}, bob= {len(bob._objects)}, cyd= {len(cyd._objects)}')
print(device)
epochs = 2
#model.to(device)
##### START THE TRAINING #### 
trainedmodel = train(args, model, device, train_loader, optimizer, epochs)

objects of ada= 2, bob= 2, cyd= 2
cuda


TypeError: ignored

### Clear the worker

In [0]:
#ada.clear_objects()
#bob.clear_objects()
#cyd.clear_objects()