In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.01
seed = 0
        
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.14285714 0.14285714 0.14285714 0.         0.14285714
  0.14285714 0.         0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.14285714 0.         0.         0.         0.14285714 0.14285714
  0.         0.14285714 0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.14285714]
 [0.14285714 0.         0.         0.         0.         0.
  0.16666667 0.         0.         0.2        0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.14285714 0.         0.         0.         0.16666667 0.14285714
  0.         0.         0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.16666667]
 [0.         0.14285714 0.         0.16666667 0.         0.14285714
  0.16666667 0.         0.         0.         0.         0.
  0.         0.         0.  

In [4]:
T = 100
C = 0.1
K = 6
E = 1
eta = 0.01
mu = 1
B = 10
B_test = 128

In [None]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [5]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [13]:
datapoints = {}
count = 0
W1 = np.array([2, 2])
W2 = np.array([-2, 2])
W = [W1, W2]
m = 50
n = 2
noise_sd = 0.01
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i]) + np.random.normal(0,noise_sd)
        datapoints[count] = {
                'features': features,
                'degree': node_degree(count, G),
                'label': label,
                'neighbors': get_neighbors(count, G)
            }
        count += 1

In [14]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [24]:
class MLP_Net(nn.Module):
    def __init__(self):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(2, 4, bias=False)
        self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        #output = self.fc3(x)
        return output

In [25]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [26]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, learning_rate, epochs, idxs):
        self.train_loader = DataLoader(CustomDataset(dataset, idxs), batch_size=batchSize, shuffle=True)
        self.learning_rate = learning_rate
        self.epochs = epochs

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=self.learning_rate, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for data, labels in self.train_loader:
                data, labels = data.cuda(), labels.cuda()
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                optimizer.step() 
                train_loss += loss.item()*data.size(0) 

            train_loss = train_loss/len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [27]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(dataset, batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data.cuda(), labels.cuda()
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    return test_loss

In [50]:
model = MLP_Net()

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([ 0.6481,  0.3780, -0.4846, -0.0346, -0.1612, -0.5344, -0.6142, -0.1854,
        -0.1454,  0.3943,  0.3717,  0.2678])


tensor([ 1.2962,  0.7561, -0.9692, -0.0693, -0.3224, -1.0687, -1.2284, -0.3709,
        -0.2908,  0.7885,  0.7435,  0.5356], grad_fn=<CatBackward0>)

In [47]:
iid_dict = iid_partition(mnist_data_train, K)
#global_model = CNN_Net().cuda()
models = [MLP_Net().cuda() for i in range(K)]
dummy_models = [MLP_Net().cuda() for i in range(K)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []


for curr_round in tqdm(range(1, T+1)):
    w, local_loss = [], []
    m = max(int(C*K), 1)
    
    for k in range(K):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=ds, batchSize=B, learning_rate=eta, epochs=E, idxs=data_dict[k])
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
    
    
    weights = copy.deepcopy(w[0]) # to iterate over the keys
    weights_sum = copy.deepcopy(w)
    for k in weights.keys():
      for i in range(K): # to iterate over the model
        weights_sum[i][k] = weights_sum[i][k] - weights_sum[i][k]
        for j in range(K): # to iterate over the neighbors
          weights_sum[i][k] += metropolis_weights[i][j] * w[j][k]



          
            

    local_test_acc = []
    local_test_loss = []
    for k in range(K):
      models[k].load_state_dict(weights_sum[k])
      g_loss, g_accuracy = testing(models[i], mnist_data_test, B_test, criterion)
      local_test_acc.append(g_accuracy)
      local_test_loss.append(g_loss)
        
    loss_avg = sum(local_loss) / len(local_loss)
    train_loss.append(loss_avg)

    g_loss = sum(local_test_loss) / len(local_test_loss)
    g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    test_accuracy.append(g_accuracy)
    print("Testint_acc %2.5f , Testing_loss %2.5f"% (test_accuracy[-1], test_loss[-1]))

NameError: name 'iid_partition' is not defined

In [None]:
fig, ax = plt.subplots()
x_axis = np.arange(1, len(train_loss)+1)
y_axis = np.array(train_loss)
ax.plot(x_axis, y_axis, label = "train loss")

y_axis = np.array(test_loss)
ax.plot(x_axis, y_axis, label = "test loss")

ax.set(xlabel='Number of Rounds', ylabel='Loss')
ax.legend()
ax.grid()

In [None]:
fig, ax = plt.subplots()
x_axis = np.arange(1, T+1)
y_axis = np.array(test_accuracy)
ax.plot(x_axis, y_axis)

ax.set(xlabel='Number of Rounds', ylabel='Test Accuracy')
ax.grid()