In [12]:
import torch
from torch import nn
from torchvision import datasets
import fastai 
from torchvision.transforms import ToTensor
# from fastai.data.core import DataLoader
from torch.utils.data import DataLoader
from fastai.data.core import DataLoaders
from fastai.callback.core import Callback
from fastai.vision.all import Learner, Metric
from fastai import optimizer
import torch.nn.functional as F
from torch.utils.data import Subset


In [2]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
batch_size = 256

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([256, 1, 28, 28])
Shape of y: torch.Size([256]) torch.int64


In [4]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [13]:
num_clients = 5
# train_size = len(training_data)
# indices = list(range(train_size))

RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
torch.random.manual_seed(RANDOM_SEED)
indices = torch.randperm(train_size).tolist()

subset_size = train_size // num_clients
client_subsets = [] 
for i in range(num_clients):
    start_idx = i * subset_size
    end_idx = start_idx + subset_size

    if i == num_clients - 1:
        end_idx = train_size

    subset_indices = indices[start_idx:end_idx]
    client_subsets.append(Subset(training_data, subset_indices))

client_loaders = [DataLoader(sub, batch_size=batch_size, shuffle=True) for sub in client_subsets]

In [33]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(), 
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits 

model = NeuralNetwork().to(device)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [43]:
# We now have 5 different datasets, each with some sort of representation of the data that is unknown, ie, we have no 
# statistical information on the data that each of these clients would have
# We now need to implement variations of the 3 protocols, namely, the encoding protocol, the communication protocol and the decoding protocol


# Encoders
def variable_size_encoder(grad_vectors):
    pass

def fixed_size_encoder(grad_vectors):
    pass

# Decoders 
def averaging_decoder(grad_vectors_list):
    if isinstance(grad_vectors_list, list):
        grad_vectors_list = torch.stack(grad_vectors_list, dim=0)
    return torch.mean(grad_vectors_list, dim=0)

# Communication protocols
def sparse_for_variable_size_encoder(grad_vectors):
    pass

def sparse_for_fixed_size_encoder(grad_vectors):
    pass

In [44]:
a = torch.FloatTensor([[1, 2, 3], [2, 3, 4], [4, 5, 6]])
averaging_decoder(a)

tensor([2.3333, 3.3333, 4.3333])

In [41]:
parameters = list(model.parameters())
parameters

[Parameter containing:
 tensor([[ 0.0110, -0.0096, -0.0194,  ...,  0.0244, -0.0186,  0.0118],
         [-0.0061,  0.0233, -0.0160,  ...,  0.0348, -0.0215,  0.0044],
         [-0.0071,  0.0019,  0.0007,  ...,  0.0242,  0.0239, -0.0050],
         ...,
         [ 0.0069, -0.0221, -0.0245,  ...,  0.0273,  0.0008, -0.0178],
         [-0.0122,  0.0277, -0.0271,  ...,  0.0178, -0.0129, -0.0095],
         [-0.0286, -0.0308, -0.0018,  ..., -0.0169,  0.0100, -0.0270]],
        device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([-3.3647e-02, -1.2088e-02, -2.7336e-03,  3.4404e-02,  1.6939e-02,
          6.6910e-03,  2.4960e-02,  3.4645e-02,  3.1078e-02, -3.3355e-02,
          2.3618e-02,  1.1166e-02, -9.6607e-03, -3.2638e-02,  1.7812e-03,
         -6.3383e-03, -1.6643e-02, -6.4174e-03, -1.7991e-02, -3.5636e-02,
         -1.5705e-02,  3.0728e-02,  2.2389e-02,  3.8432e-05,  3.2346e-02,
         -2.2608e-02,  2.6099e-02, -3.3211e-03,  1.2657e-02, -2.7012e-02,
          3.2372e-02,  

In [42]:
models = []


6