# Tutorial - Vertically Partitioned Split Learning

For demonstration of this structure, MNIST model has been split vertically according to the integer assigned to the variable 'partition'.

Images have a dimension of [28 x 28]. We will divide the dataset into some batches, in this case we divide it in 64. Then image dimensions will be resolved to [64 x 784] where row denotes the image and column denotes features of that image. Then we will split these 784 features to act like we have separate datasets containing unique features of an image across columns.    



In [0]:
import syft, torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

Create hook and virtual workers - alice and bob

In [0]:
hook = syft.TorchHook(torch)
alice = syft.VirtualWorker(hook, id='alice')
bob = syft.VirtualWorker(hook, id='bob')

# Creating a class SplitNN

Create a class SplitNN containing all the functionalities.

In [0]:
class SplitNN(nn.Module):
    def __init__(self, models, optimizers, partition):
        super().__init__()
        self.models = models
        self.optimizers = optimizers
        self.output = [None] * (partition)

    def zero_grads(self):
        for opt in self.optimizers:
            opt.zero_grad()

    # Here x is a list having a batch of diffent partitioned datasets.
    def forward(self, x):                  
        for i in range(len(x)):
            self.output[i] = self.models[i](x[i])

        # Concatenating the output of various structures in bottom part (alice's location)
        total_out = torch.cat(tuple(self.output[i] for i in range(len(self.output))), dim=1)    
        if self.output[-1].location == self.models[-1].location:
            second_layer_inp = total_out.detach().requires_grad_()
        else:
            second_layer_inp = total_out.detach().move(self.models[-1].location).requires_grad_()

        self.second_layer_inp = second_layer_inp
        pred = self.models[-1](second_layer_inp)
        return pred

    def backward(self):
        second_layer_inp = self.second_layer_inp
        if self.output[-1].location == second_layer_inp.location:
            grad = second_layer_inp.grad.copy()
        else:
            grad = second_layer_inp.grad.copy().move(self.output[-1].location)

        i = 0
        while i < partition-1:
            self.output[i].backward(grad[:, hidden_sizes[1]*i : hidden_sizes[1]*(i+1)])
            i += 1

        # This is implemented because it is not necessary that last batch is of exact same size as partitioned.
        self.output[i].backward(grad[:, hidden_sizes[1]*i : ])         


    def step(self):
        for opt in self.optimizers:
            opt.step()

Here function create_models, creates models according to the partitions of the model in the bottom part (alice's model).
  
First we iterate 1 less than the partition size because of their same model structure. Since we divided the data in batches, last part may or may not have the size of the batch. So for this we calculate the remaining size of the dataset, and accordingly create the model.

Then a model on the bob's machine is created having labels.

In [0]:
def create_models(partition, input_size, hidden_sizes, output_size):
    models = list()
    for _ in range(1, partition):
        models.append(nn.Sequential(nn.Linear(int(input_size/partition), hidden_sizes[0]),
                                    nn.ReLU(),
                                    nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                                    nn.ReLU()))
    rem = input_size - int(input_size/partition * (partition-1))
    models.append(nn.Sequential(nn.Linear(rem, hidden_sizes[0]),
                                nn.ReLU(),
                                nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                                nn.ReLU()))

    models.append(nn.Sequential(nn.Linear(hidden_sizes[1]*partition, hidden_sizes[2]),
                                nn.ReLU(),
                                nn.Linear(hidden_sizes[2], output_size),
                                nn.LogSoftmax(dim=1)))
    return models

Assign an integer to the partition variable indicating number of partitions available.

In [0]:
partition = 3

Load the dataset.

In [0]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

trainset = datasets.MNIST('/content/sample_data/mnist', download=True, train=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/sample_data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /content/sample_data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /content/sample_data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/sample_data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /content/sample_data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /content/sample_data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/sample_data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /content/sample_data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/sample_data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/sample_data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /content/sample_data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/sample_data/mnist/MNIST/raw
Processing...
Done!


Initialize the sizes.

In [0]:
input_size = 784
hidden_sizes = [128, 256, 512]
output_size = 10

Partition the data as needed to imitate that we have different datasets having different features for our image.

In [0]:
# 3 lists to contain 3 segmented datasets.
image_set1 = list()
image_set2 = list()
image_set3 = list()
labels = list()

# Segment the datalength in number of partitions
distr = int(input_size/partition)   

for image, label in trainloader:

    """Here we will set the image to [64 x 784] and split columns so that each
       list contains unique features for each batch of images which are
       arranged row-wise.""" 
    image = image.view(image.shape[0], -1)

    image_set1.append(image[:, 0:distr])
    image_set2.append(image[:, distr:distr*2])
    image_set3.append(image[:, distr*2:])
    labels.append(label)

Here we assign output from create_models to a variable models.

Now, according to these models, we are creating different optimizers for different model.

In [0]:
models = create_models(partition, input_size, hidden_sizes, output_size)

optimizers = [optim.SGD(model.parameters(), lr=0.01) for model in models]

Now append the model locations. All the partitioned models are situated in alice's machine and the model with labels in bob's machine.

After this, send each model to its location.

In [0]:
model_locations = list()
for i in range(partition):
    model_locations.append(alice)
model_locations.append(bob)

for model, location in zip(models, model_locations):
    model.send(location)

Create a object for class SplitNN.

In [0]:
splitNN = SplitNN(models, optimizers, partition)

Define a train function.

In [0]:
def train(x, target, splitnn):
    splitnn.zero_grads()
    pred = splitnn.forward(x)
    criterion = nn.NLLLoss()
    loss = criterion(pred, target)
    loss.backward()
    splitnn.backward()
    splitnn.step()
    return loss

Since we have 2 partitions, we will run the lists in parallel which is made by splitting the dataset from trainloader. Send them to their respective machines i.e. images in alice's machine and labels in bob's machine.

In [0]:
epochs = 20

for i in range(epochs):
    total_loss = 0
    for x1, x2, x3, y in zip(image_set1, image_set2, image_set3, labels):
        x1, x2 = x1.send(models[0].location), x2.send(models[0].location)
        x3 = x3.send(models[0].location)
        y = y.send(models[-1].location)
        loss = train([x1, x2, x3], y, splitNN)
        total_loss += loss.get()

    else:
        print(f"Epoch: {i+1}... Training Loss: {total_loss/len(image_set1)}")

Epoch: 1... Training Loss: 1.5759333372116089
Epoch: 2... Training Loss: 0.46538984775543213
Epoch: 3... Training Loss: 0.35638368129730225
Epoch: 4... Training Loss: 0.3079238533973694
Epoch: 5... Training Loss: 0.2724384367465973
Epoch: 6... Training Loss: 0.24428021907806396
Epoch: 7... Training Loss: 0.22112241387367249
Epoch: 8... Training Loss: 0.2017352432012558
Epoch: 9... Training Loss: 0.1853802353143692
Epoch: 10... Training Loss: 0.17141534388065338
Epoch: 11... Training Loss: 0.15943169593811035
Epoch: 12... Training Loss: 0.14898844063282013
Epoch: 13... Training Loss: 0.1397523432970047
Epoch: 14... Training Loss: 0.1314735859632492
Epoch: 15... Training Loss: 0.12397981435060501
Epoch: 16... Training Loss: 0.11719723790884018
Epoch: 17... Training Loss: 0.11096645891666412
Epoch: 18... Training Loss: 0.10527816414833069
Epoch: 19... Training Loss: 0.10002068430185318
Epoch: 20... Training Loss: 0.09515957534313202
