# Distributed Machine Learning as System-of-System (SoS)
---
## Set up
* ### Imports

In [None]:
import torch as th
from torch import nn

import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from torch.distributions.uniform import Uniform
from torch.utils.data import DataLoader

from matplotlib import pyplot as plt
import numpy as np

from src import DatasetManager, Company, SoS

* ### Dataset & shared submodule structure

In [None]:
# Validation dataset
mnist_test = MNIST("src/data/", train=False, transform=transforms.ToTensor(), download=True)
dtManager = DatasetManager(mnist_test)
dt, _ = dtManager.get_dataset(size=1000, distribution=Uniform(0, 10))
loader_test = DataLoader(dt, 100, shuffle=True, num_workers=3)

# Training dataset
mnist_train = MNIST("src/data/", transform=transforms.ToTensor(), download=True)
dtManager = DatasetManager(mnist_train)

class Model(nn.Module):
    def __init__(self) -> None:
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x) -> th.Tensor:
        x = self.conv1(x)
        x = th.relu(x)
        x = self.conv2(x)
        x = th.relu(x)
        x = th.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = th.flatten(x, 1)
        x = self.fc1(x)
        x = th.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = th.log_softmax(x, dim=1)
        return output

device = th.device("cuda:0" if th.cuda.is_available() else "cpu")

Dataset Sample

In [None]:
def imshow(img):
    # img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)), cmap='gray')
    plt.show()

dataiter = iter(loader_test)
images, labels = next(dataiter)

imshow(torchvision.utils.make_grid(images))

## System-of-systems

### 1. Creation

In [None]:
## Create Companies
n = 5
companies = []

for i in range(n):
    dt, distribution = dtManager.get_dataset()
    
    companies.append(
        Company("Company " + i, 
                dataset = dt,
                distribution=distribution,
                shared_model = Model().to(device),
                device=device
        ))

sos = SoS(companies);

Random distribution of companies' dataset

In [None]:

cnames = sos.get_names()
cdist = sos.get_distribution()

for i in range(cnames):
    plt.bar(range(cdist.shape[1]), cdist[i])
    plt.title("Data Distribution for " + cnames[i])
    plt.grid(True)
    plt.show()

### 2. Partnership Formation

### 3. Distributed Machine Learning