In [91]:
# Importing the MNIST dataset to work on

import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

data_path_str = "./data"
ETA = "\N{GREEK SMALL LETTER ETA}"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic=True

transform = transforms.Compose([
    transforms.ToTensor(),
    # normalize by training set mean and standard deviation
    # resulting data has mean=0 and std=1
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(data_path_str, train=True, download=True, transform=transform)
test_loader = DataLoader(
    datasets.MNIST(data_path_str, train=False, download=False, transform=transform),
    # decrease batch size if running into memory issues when testing
    # a bespoke generator is passed to avoid reproducibility issues
    shuffle=False, drop_last=False, batch_size=10000, generator=torch.Generator())

device

device(type='cpu')

In [92]:
# Partitioning data (each image into 4 parts)

data1= torch.stack([a[0:7] for a in train_dataset.data])
data2= torch.stack([a[8:14] for a in train_dataset.data])
data3= torch.stack([a[15:21] for a in train_dataset.data])
data4= torch.stack([a[22:28] for a in train_dataset.data])


In [93]:
# Test dataset

test_dataset= [test_loader.dataset[i][0] for i in range(len(test_loader.dataset))]
test_labels= [test_loader.dataset[i][1] for i in range(len(test_loader.dataset))]

In [94]:
# Creating label owner split
from typing import cast

import numpy as np
import numpy.random as npr
from torch.utils.data import Subset

def split(nr_clients: int, seed: int) -> list[Subset]:
    rng = npr.default_rng(seed)
    indices= rng.permutation(len(train_dataset))
    splits = np.array_split(indices, nr_clients)

    return [Subset(train_dataset, split) for split in cast(list[list[int]], splits)], indices

In [95]:
# Creating label split
sample_split, sample_ids= split(5, 42)
label_owner1= sample_split[0]
label_id1= sample_ids[0:12000]

# Aligning the data across each of the owners and label owner 1
# Retrieving data corresponding to which labels are with label owner 1

labels1= [label_owner1[i][1] for i in range(len(label_owner1))]
dataA_label1= torch.stack([data1[i] for i in label_id1])
dataB_label1= torch.stack([data2[i] for i in label_id1])
dataC_label1= torch.stack([data3[i] for i in label_id1])
dataD_label1= torch.stack([data4[i] for i in label_id1])
data_labels1= [dataA_label1, dataB_label1, dataC_label1, dataD_label1]

In [96]:
from pathlib import Path

import pandas as pd
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler

# Data owner neural network

import torch.nn as nn
import torch.nn.functional as F

class BottomModel(nn.Module):
    def __init__(self, in_feat, out_feat):
        super(BottomModel, self).__init__()
        self.local_out_dim = out_feat
        self.conv= nn.Conv2d(in_feat, 32, 3, 1)
    
    def forward(self, x):
        x= self.conv(x)
        x= F.ReLU(x)
        x= self.drop(x)
        return x

In [97]:
# Label owner neural network

class TopModel(nn.Module):
    def __init__(self, local_models, n_outs):
        super(TopModel, self).__init__()
        self.in_size = sum([local_models[i].local_out_dim for i in range(len(local_models))])
        self.conv = nn.Conv2d(self.in_size, 128, 3, 1)
        self.lin1 = nn.Linear(128, 256)
        self.lin2 = nn.Linear(256, 10) # Final output = number of possible classes (10 digit types)
        self.act = nn.LeakyReLU()
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        concat_outs = torch.cat(x, dim=1)  # concatenate local model outputs before forward pass
        x = self.act(self.conv(concat_outs))
        x = self.act(self.lin1(x))
        x = self.act(self.lin2(x))
        return self.dropout(x)

In [98]:
class VFLNetwork(nn.Module):
    def __init__(self, local_models, n_outs):
        super(VFLNetwork, self).__init__()
        self.bottom_models = local_models
        self.top_model = TopModel(self.bottom_models, n_outs)
        self.optimizer = optim.AdamW(self.parameters())
        self.criterion = nn.CrossEntropyLoss()

    def train_with_settings(self, epochs, batch_sz, x, y):
        num_batches = len(x) // batch_sz if len(x) % batch_sz == 0 else len(x) // batch_sz + 1
        for epoch in range(epochs):
            self.optimizer.zero_grad()
            total_loss = 0.0
            correct = 0.0
            total = 0.0
            for minibatch in range(num_batches):
                if minibatch == num_batches - 1:
                    x_minibatch = [p[int(minibatch * batch_sz):] for p in x]
                    y_minibatch = y[int(minibatch * batch_sz):]
                else:
                    x_minibatch = [p[int(minibatch * batch_sz):int((minibatch + 1) * batch_sz)] for p in x]
                    y_minibatch = y[int(minibatch * batch_sz):int((minibatch + 1) * batch_sz)]

                outs = self.forward(x_minibatch)
                pred = torch.argmax(outs, dim=1)
                actual = torch.argmax(y_minibatch, dim=1)
                correct += torch.sum((pred == actual))
                total += len(actual)
                loss = self.criterion(outs, y_minibatch)
                total_loss += loss
                loss.backward()
                self.optimizer.step()

            print(
                f"Epoch: {epoch} Train accuracy: {correct * 100 / total:.2f}% Loss: {total_loss.detach().numpy()/num_batches:.3f}")

    def forward(self, x):
        local_outs = [self.bottom_models[i](x[i]) for i in range(len(self.bottom_models))]
        return self.top_model(local_outs)

    def test(self, x, y):
        with torch.no_grad():
            outs = self.forward(x)
            preds = torch.argmax(outs, dim=1)
            actual = torch.argmax(y, dim=1)
            accuracy = torch.sum((preds == actual)) / len(actual)
            loss = self.criterion(outs, y)
            return accuracy, loss


In [99]:
if __name__ == "__main__":
    torch.manual_seed(42)
    np.random.seed(42)

    # model architecture hyperparameters
    outs_per_client = 10
    bottom_models = [BottomModel(7, 70)]*4
    final_out_dims = 10
    Network = VFLNetwork(bottom_models, final_out_dims)

    #Training configurations
    EPOCHS = 500
    BATCH_SIZE = 64
    Network.train_with_settings(EPOCHS, BATCH_SIZE, [data1, data2, data3, data4], data_labels1)

    
    accuracy, loss = Network.test(test_dataset, test_labels)
    print(f"Test accuracy: {accuracy * 100:.2f}%")

RuntimeError: Input type (unsigned char) and bias type (float) should be the same