In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [46]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.1
seed = 0
alpha = 1e-2
lamda = 1e-3
eta = 1e-2
d0 = 8
no_users = sum(cluster_sizes)
batch_size = 20
epochs = 1
it = 200
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [47]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.         0.125      0.14285714 0.         0.125
  0.         0.         0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.16666667]
 [0.         0.         0.125      0.14285714 0.         0.125
  0.         0.16666667 0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.125      0.125      0.         0.         0.         0.125
  0.125      0.         0.         0.         0.         0.
  0.         0.         0.125      0.         0.125      0.
  0.         0.125     ]
 [0.14285714 0.14285714 0.         0.         0.         0.125
  0.14285714 0.         0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.14285714 0.        ]
 [0.         0.         0.         0.         0.         0.125
  0.         0.         0.         0.14285714 0.         0.
  0.         0.         0.         0.        

In [48]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [49]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [50]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 0.0, 5.0, -2.0, -3.0, 5.0, 2.0])
W2 = np.array([2.0, 2.0, 0.0, 5.0, 2.0, 3.0, -5.0, -2.0])
W = [W1, W2]
m = 200
n = 8
noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i]) + np.random.normal(0,noise_sd)
        datapoints[count] = {
                'features': features,
                'degree': node_degree(count, G),
                'label': label,
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [51]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [52]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(8, 1, bias=False)
        #self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        #x = F.relu(self.fc1(x))
        output = self.fc1(x)
        #output = self.fc3(x)
        return output

In [53]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [54]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)

        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss.detach(), parameters_to_vector(model.parameters()).detach())
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

0 tensor(67.7575) tensor([-0.2762,  0.3150, -0.2469, -0.0316, -0.3489,  0.0675, -0.0357, -0.2872])
0 tensor(43.6709) tensor([-0.2384,  0.3568, -0.2781,  0.0811, -0.3566,  0.1175, -0.1236, -0.3307])
0 tensor(71.2480) tensor([-0.1696,  0.3602, -0.2363,  0.1274, -0.3335,  0.1468, -0.1930, -0.3289])
0 tensor(57.2519) tensor([-0.1687,  0.3975, -0.2431,  0.2185, -0.2818,  0.1925, -0.3287, -0.3383])
1 tensor(58.7885) tensor([-0.1393,  0.3980, -0.2538,  0.3296, -0.2605,  0.2443, -0.3907, -0.3797])
1 tensor(38.1779) tensor([-0.1040,  0.4376, -0.2827,  0.4343, -0.2664,  0.2908, -0.4718, -0.4207])
1 tensor(62.0731) tensor([-0.0395,  0.4411, -0.2432,  0.4768, -0.2436,  0.3189, -0.5362, -0.4192])
1 tensor(49.8173) tensor([-0.0384,  0.4765, -0.2490,  0.5614, -0.1940,  0.3614, -0.6626, -0.4279])
2 tensor(51.0360) tensor([-0.0110,  0.4774, -0.2582,  0.6645, -0.1732,  0.4102, -0.7196, -0.4672])
2 tensor(33.3960) tensor([ 0.0219,  0.5149, -0.2849,  0.7616, -0.1775,  0.4535, -0.7946, -0.5058])
2 tensor(5

27 tensor(1.9737) tensor([ 1.5774,  1.6166, -0.1107,  4.3202,  1.3509,  2.4541, -4.3162, -1.6272])
27 tensor(1.5998) tensor([ 1.5788,  1.6250, -0.1100,  4.3329,  1.3642,  2.4608, -4.3369, -1.6286])
28 tensor(1.5415) tensor([ 1.5838,  1.6271, -0.1083,  4.3484,  1.3718,  2.4713, -4.3438, -1.6381])
28 tensor(1.2119) tensor([ 1.5897,  1.6358, -0.1124,  4.3631,  1.3757,  2.4784, -4.3539, -1.6467])
28 tensor(1.7359) tensor([ 1.6019,  1.6375, -0.1050,  4.3670,  1.3843,  2.4862, -4.3626, -1.6475])
28 tensor(1.4092) tensor([ 1.6033,  1.6455, -0.1044,  4.3788,  1.3969,  2.4924, -4.3820, -1.6488])
29 tensor(1.3551) tensor([ 1.6080,  1.6475, -0.1027,  4.3932,  1.4041,  2.5023, -4.3883, -1.6578])
29 tensor(1.0716) tensor([ 1.6135,  1.6558, -0.1065,  4.4069,  1.4078,  2.5090, -4.3978, -1.6659])
29 tensor(1.5271) tensor([ 1.6250,  1.6574, -0.0996,  4.4104,  1.4161,  2.5163, -4.4059, -1.6667])
29 tensor(1.2417) tensor([ 1.6263,  1.6649, -0.0990,  4.4214,  1.4280,  2.5221, -4.4239, -1.6680])
30 tensor(

55 tensor(0.0460) tensor([ 1.9177,  1.9224, -0.0226,  4.9028,  1.8589,  2.8969, -4.8990, -1.9235])
55 tensor(0.0584) tensor([ 1.9201,  1.9228, -0.0213,  4.9030,  1.8610,  2.8987, -4.9003, -1.9239])
55 tensor(0.0497) tensor([ 1.9205,  1.9244, -0.0211,  4.9048,  1.8637,  2.8998, -4.9035, -1.9242])
56 tensor(0.0461) tensor([ 1.9214,  1.9250, -0.0205,  4.9070,  1.8654,  2.9018, -4.9043, -1.9261])
56 tensor(0.0408) tensor([ 1.9224,  1.9267, -0.0212,  4.9092,  1.8667,  2.9029, -4.9056, -1.9278])
56 tensor(0.0516) tensor([ 1.9247,  1.9272, -0.0200,  4.9094,  1.8687,  2.9046, -4.9068, -1.9281])
56 tensor(0.0440) tensor([ 1.9251,  1.9287, -0.0198,  4.9110,  1.8712,  2.9056, -4.9098, -1.9284])
57 tensor(0.0408) tensor([ 1.9259,  1.9293, -0.0192,  4.9131,  1.8729,  2.9075, -4.9105, -1.9302])
57 tensor(0.0362) tensor([ 1.9269,  1.9309, -0.0199,  4.9151,  1.8740,  2.9085, -4.9117, -1.9318])
57 tensor(0.0456) tensor([ 1.9290,  1.9313, -0.0188,  4.9153,  1.8759,  2.9102, -4.9128, -1.9321])
57 tensor(

87 tensor(0.0012) tensor([ 1.9881e+00,  1.9881e+00, -2.6010e-03,  4.9883e+00,  1.9781e+00,
         2.9850e+00, -4.9879e+00, -1.9881e+00])
87 tensor(0.0010) tensor([ 1.9881e+00,  1.9883e+00, -2.5719e-03,  4.9885e+00,  1.9785e+00,
         2.9852e+00, -4.9883e+00, -1.9882e+00])
88 tensor(0.0010) tensor([ 1.9883e+00,  1.9884e+00, -2.4609e-03,  4.9888e+00,  1.9788e+00,
         2.9855e+00, -4.9883e+00, -1.9885e+00])
88 tensor(0.0009) tensor([ 1.9884e+00,  1.9887e+00, -2.5794e-03,  4.9890e+00,  1.9790e+00,
         2.9856e+00, -4.9885e+00, -1.9887e+00])
88 tensor(0.0010) tensor([ 1.9887e+00,  1.9888e+00, -2.4311e-03,  4.9890e+00,  1.9793e+00,
         2.9859e+00, -4.9886e+00, -1.9888e+00])
88 tensor(0.0009) tensor([ 1.9888e+00,  1.9890e+00, -2.4039e-03,  4.9892e+00,  1.9797e+00,
         2.9860e+00, -4.9890e+00, -1.9888e+00])
89 tensor(0.0008) tensor([ 1.9889e+00,  1.9891e+00, -2.2992e-03,  4.9895e+00,  1.9800e+00,
         2.9863e+00, -4.9891e+00, -1.9891e+00])
89 tensor(0.0008) tensor([ 

In [55]:
parameters_to_vector(model.parameters())

tensor([ 1.9943e+00,  1.9943e+00, -1.0834e-03,  4.9948e+00,  1.9895e+00,
         2.9929e+00, -4.9946e+00, -1.9943e+00], grad_fn=<CatBackward0>)

In [56]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [57]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [58]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, models, first_run=True):
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = d0, parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        
                        # Generate random values from a normal distribution for the diagonal
                        diag_values = 1.0 + 1.0 * torch.randn(row)

                        # Create a diagonal matrix with random values
                        mat = torch.diag(diag_values)
                        
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[i].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[i][j], parameters_to_vector(models[i].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights, models)



In [59]:
print(projection_list[0])

[0, tensor([[ 2.9217,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.7406,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000, -1.3727,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  1.0376,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.9716,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0785,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.8234,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0488]]), 0, tensor([[ 2.4993,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.5665,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.3929,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000, -0.5409,  0.0000,  0.0000,  0.0000,  0.0000],
       

In [60]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [61]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [62]:
torch.norm(datapoints[model.user_id]['exact_weights'])

tensor(8.6603, dtype=torch.float64)

In [63]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([-0.0952, -0.3215, -0.2745, -0.0331,  0.1014,  0.0049,  0.3437, -0.3522])


tensor([-0.1904, -0.6431, -0.5490, -0.0662,  0.2029,  0.0098,  0.6874, -0.7043],
       grad_fn=<CatBackward0>)

In [64]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    projected_weights = []
    update_ProjWeight(projection_list, projected_weights, models, first_run=False)
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            temp_mat = torch.outer(projected_weights[i][j] - projected_weights[j][i], weights).clone()
            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * temp_mat)
                                         
                                              
    
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    total_rel_error.append(user_rel_error / no_users)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f,   Relative Error %2.5f"% (test_loss[-1], total_rel_error[-1]))

  1%|          | 2/200 [00:00<00:21,  9.16it/s]

Training_loss 57.55137,   Relative Error 0.95083
Training_loss 56.42604,   Relative Error 0.94203


  2%|▏         | 4/200 [00:00<00:22,  8.86it/s]

Training_loss 53.99686,   Relative Error 0.92218
Training_loss 52.94161,   Relative Error 0.91279


  3%|▎         | 6/200 [00:00<00:21,  8.83it/s]

Training_loss 51.54682,   Relative Error 0.90185
Training_loss 49.29687,   Relative Error 0.88260


  4%|▍         | 8/200 [00:00<00:22,  8.39it/s]

Training_loss 48.00951,   Relative Error 0.87076
Training_loss 46.68472,   Relative Error 0.85941


  5%|▌         | 10/200 [00:01<00:22,  8.32it/s]

Training_loss 43.88747,   Relative Error 0.83429
Training_loss 42.22898,   Relative Error 0.81770


  6%|▌         | 12/200 [00:01<00:22,  8.28it/s]

Training_loss 40.93532,   Relative Error 0.80510
Training_loss 39.96024,   Relative Error 0.79585


  7%|▋         | 14/200 [00:01<00:22,  8.35it/s]

Training_loss 38.79027,   Relative Error 0.78397
Training_loss 37.86939,   Relative Error 0.77522


  8%|▊         | 16/200 [00:01<00:21,  8.42it/s]

Training_loss 36.05740,   Relative Error 0.75703
Training_loss 34.93983,   Relative Error 0.74618


  9%|▉         | 18/200 [00:02<00:21,  8.31it/s]

Training_loss 33.42794,   Relative Error 0.73073
Training_loss 32.12336,   Relative Error 0.71699


 10%|█         | 20/200 [00:02<00:21,  8.40it/s]

Training_loss 31.00620,   Relative Error 0.70465
Training_loss 29.51133,   Relative Error 0.68857


 11%|█         | 22/200 [00:02<00:21,  8.16it/s]

Training_loss 28.69645,   Relative Error 0.67900
Training_loss 27.85218,   Relative Error 0.66883


 12%|█▏        | 24/200 [00:02<00:22,  7.89it/s]

Training_loss 27.32808,   Relative Error 0.66298
Training_loss 25.90296,   Relative Error 0.64638


 13%|█▎        | 26/200 [00:03<00:21,  8.05it/s]

Training_loss 25.04710,   Relative Error 0.63537
Training_loss 24.37683,   Relative Error 0.62714


 14%|█▍        | 28/200 [00:03<00:21,  7.91it/s]

Training_loss 23.52145,   Relative Error 0.61624
Training_loss 23.25799,   Relative Error 0.61249


 15%|█▌        | 30/200 [00:03<00:23,  7.15it/s]

Training_loss 22.11505,   Relative Error 0.59856
Training_loss 21.26139,   Relative Error 0.58751


 16%|█▌        | 31/200 [00:03<00:25,  6.56it/s]

Training_loss 20.48802,   Relative Error 0.57720


 16%|█▌        | 32/200 [00:04<00:28,  5.83it/s]

Training_loss 19.81833,   Relative Error 0.56777


 16%|█▋        | 33/200 [00:04<00:32,  5.14it/s]

Training_loss 19.23632,   Relative Error 0.55908


 17%|█▋        | 34/200 [00:04<00:36,  4.57it/s]

Training_loss 18.47148,   Relative Error 0.54875


 18%|█▊        | 35/200 [00:04<00:39,  4.14it/s]

Training_loss 17.94512,   Relative Error 0.54071


 18%|█▊        | 36/200 [00:05<00:45,  3.59it/s]

Training_loss 17.59059,   Relative Error 0.53492


 18%|█▊        | 37/200 [00:05<00:47,  3.40it/s]

Training_loss 16.99384,   Relative Error 0.52624


 20%|█▉        | 39/200 [00:06<00:40,  4.01it/s]

Training_loss 16.35308,   Relative Error 0.51639
Training_loss 15.82757,   Relative Error 0.50754


 20%|██        | 41/200 [00:06<00:33,  4.75it/s]

Training_loss 15.54946,   Relative Error 0.50295
Training_loss 15.22170,   Relative Error 0.49778


 22%|██▏       | 43/200 [00:06<00:32,  4.86it/s]

Training_loss 14.75654,   Relative Error 0.49037
Training_loss 14.34031,   Relative Error 0.48369


 22%|██▎       | 45/200 [00:07<00:29,  5.27it/s]

Training_loss 13.77390,   Relative Error 0.47432
Training_loss 13.23176,   Relative Error 0.46548


 24%|██▎       | 47/200 [00:07<00:24,  6.12it/s]

Training_loss 12.94117,   Relative Error 0.46015
Training_loss 12.45887,   Relative Error 0.45203


 24%|██▍       | 49/200 [00:07<00:25,  5.96it/s]

Training_loss 12.06679,   Relative Error 0.44496
Training_loss 11.73654,   Relative Error 0.43914


 25%|██▌       | 50/200 [00:07<00:26,  5.59it/s]

Training_loss 11.33924,   Relative Error 0.43199


 26%|██▌       | 51/200 [00:08<00:28,  5.31it/s]

Training_loss 11.05507,   Relative Error 0.42603


 26%|██▌       | 52/200 [00:08<00:29,  5.06it/s]

Training_loss 10.80338,   Relative Error 0.42118


 26%|██▋       | 53/200 [00:08<00:30,  4.80it/s]

Training_loss 10.45766,   Relative Error 0.41458


 27%|██▋       | 54/200 [00:08<00:31,  4.58it/s]

Training_loss 10.16816,   Relative Error 0.40889


 28%|██▊       | 55/200 [00:09<00:34,  4.23it/s]

Training_loss 9.85639,   Relative Error 0.40281


 28%|██▊       | 57/200 [00:09<00:31,  4.50it/s]

Training_loss 9.41173,   Relative Error 0.39387
Training_loss 9.14333,   Relative Error 0.38858


 30%|██▉       | 59/200 [00:09<00:25,  5.48it/s]

Training_loss 8.81589,   Relative Error 0.38184
Training_loss 8.59345,   Relative Error 0.37681


 30%|███       | 61/200 [00:10<00:21,  6.62it/s]

Training_loss 8.31480,   Relative Error 0.37076
Training_loss 7.92500,   Relative Error 0.36248


 32%|███▏      | 63/200 [00:10<00:19,  7.06it/s]

Training_loss 7.69268,   Relative Error 0.35738
Training_loss 7.34698,   Relative Error 0.34936


 32%|███▎      | 65/200 [00:10<00:19,  6.95it/s]

Training_loss 7.07583,   Relative Error 0.34286
Training_loss 6.81865,   Relative Error 0.33682


 34%|███▎      | 67/200 [00:10<00:19,  6.96it/s]

Training_loss 6.68092,   Relative Error 0.33349
Training_loss 6.48059,   Relative Error 0.32861


 34%|███▍      | 69/200 [00:11<00:18,  7.26it/s]

Training_loss 6.30775,   Relative Error 0.32415
Training_loss 6.14800,   Relative Error 0.32034


 36%|███▌      | 71/200 [00:11<00:16,  7.82it/s]

Training_loss 5.98823,   Relative Error 0.31633
Training_loss 5.66655,   Relative Error 0.30802


 36%|███▋      | 73/200 [00:11<00:15,  7.95it/s]

Training_loss 5.43725,   Relative Error 0.30195
Training_loss 5.21467,   Relative Error 0.29596


 38%|███▊      | 75/200 [00:11<00:15,  8.30it/s]

Training_loss 5.06314,   Relative Error 0.29173
Training_loss 4.89488,   Relative Error 0.28700


 38%|███▊      | 77/200 [00:12<00:14,  8.50it/s]

Training_loss 4.72920,   Relative Error 0.28241
Training_loss 4.56849,   Relative Error 0.27776


 40%|███▉      | 79/200 [00:12<00:14,  8.64it/s]

Training_loss 4.36153,   Relative Error 0.27148
Training_loss 4.20338,   Relative Error 0.26678


 40%|████      | 81/200 [00:12<00:14,  8.06it/s]

Training_loss 3.99527,   Relative Error 0.26035
Training_loss 3.78666,   Relative Error 0.25359


 42%|████▏     | 83/200 [00:12<00:14,  8.06it/s]

Training_loss 3.65124,   Relative Error 0.24921
Training_loss 3.54974,   Relative Error 0.24578


 42%|████▎     | 85/200 [00:13<00:14,  8.16it/s]

Training_loss 3.42324,   Relative Error 0.24138
Training_loss 3.28114,   Relative Error 0.23651


 44%|████▎     | 87/200 [00:13<00:13,  8.54it/s]

Training_loss 3.17014,   Relative Error 0.23232
Training_loss 3.09527,   Relative Error 0.22961


 44%|████▍     | 89/200 [00:13<00:14,  7.55it/s]

Training_loss 3.01794,   Relative Error 0.22671
Training_loss 2.96129,   Relative Error 0.22437


 46%|████▌     | 91/200 [00:13<00:15,  6.93it/s]

Training_loss 2.87347,   Relative Error 0.22119
Training_loss 2.78711,   Relative Error 0.21791


 46%|████▋     | 93/200 [00:14<00:16,  6.31it/s]

Training_loss 2.70106,   Relative Error 0.21463
Training_loss 2.66782,   Relative Error 0.21332


 48%|████▊     | 95/200 [00:14<00:18,  5.70it/s]

Training_loss 2.60408,   Relative Error 0.21086
Training_loss 2.51983,   Relative Error 0.20757


 48%|████▊     | 96/200 [00:14<00:19,  5.34it/s]

Training_loss 2.45910,   Relative Error 0.20498


 48%|████▊     | 97/200 [00:15<00:20,  5.14it/s]

Training_loss 2.38566,   Relative Error 0.20202


 50%|████▉     | 99/200 [00:15<00:20,  5.04it/s]

Training_loss 2.29144,   Relative Error 0.19820
Training_loss 2.23844,   Relative Error 0.19587


 50%|█████     | 101/200 [00:15<00:17,  5.62it/s]

Training_loss 2.15914,   Relative Error 0.19230
Training_loss 2.06926,   Relative Error 0.18831


 52%|█████▏    | 103/200 [00:16<00:16,  5.83it/s]

Training_loss 2.03196,   Relative Error 0.18676
Training_loss 1.96474,   Relative Error 0.18371


 52%|█████▎    | 105/200 [00:16<00:14,  6.63it/s]

Training_loss 1.90073,   Relative Error 0.18067
Training_loss 1.83411,   Relative Error 0.17761


 54%|█████▎    | 107/200 [00:16<00:13,  7.07it/s]

Training_loss 1.75884,   Relative Error 0.17384
Training_loss 1.68413,   Relative Error 0.17017


 55%|█████▍    | 109/200 [00:17<00:14,  6.31it/s]

Training_loss 1.63410,   Relative Error 0.16756
Training_loss 1.58003,   Relative Error 0.16471


 56%|█████▌    | 111/200 [00:17<00:13,  6.39it/s]

Training_loss 1.53832,   Relative Error 0.16251
Training_loss 1.50270,   Relative Error 0.16061


 56%|█████▋    | 113/200 [00:17<00:13,  6.43it/s]

Training_loss 1.45345,   Relative Error 0.15789
Training_loss 1.39923,   Relative Error 0.15505


 57%|█████▊    | 115/200 [00:17<00:12,  7.03it/s]

Training_loss 1.33222,   Relative Error 0.15130
Training_loss 1.28343,   Relative Error 0.14860


 58%|█████▊    | 117/200 [00:18<00:12,  6.75it/s]

Training_loss 1.23090,   Relative Error 0.14550
Training_loss 1.19650,   Relative Error 0.14344


 60%|█████▉    | 119/200 [00:18<00:11,  7.01it/s]

Training_loss 1.15732,   Relative Error 0.14115
Training_loss 1.12275,   Relative Error 0.13901


 60%|██████    | 121/200 [00:18<00:10,  7.36it/s]

Training_loss 1.07439,   Relative Error 0.13605
Training_loss 1.03383,   Relative Error 0.13352


 62%|██████▏   | 123/200 [00:19<00:10,  7.41it/s]

Training_loss 1.01520,   Relative Error 0.13236
Training_loss 0.98124,   Relative Error 0.13019


 62%|██████▎   | 125/200 [00:19<00:10,  7.28it/s]

Training_loss 0.95288,   Relative Error 0.12834
Training_loss 0.90978,   Relative Error 0.12550


 64%|██████▎   | 127/200 [00:19<00:09,  7.51it/s]

Training_loss 0.88541,   Relative Error 0.12382
Training_loss 0.84413,   Relative Error 0.12093


 64%|██████▍   | 129/200 [00:19<00:09,  7.65it/s]

Training_loss 0.80639,   Relative Error 0.11822
Training_loss 0.77116,   Relative Error 0.11567


 66%|██████▌   | 131/200 [00:20<00:08,  7.88it/s]

Training_loss 0.75275,   Relative Error 0.11434
Training_loss 0.72722,   Relative Error 0.11239


 66%|██████▋   | 133/200 [00:20<00:08,  7.56it/s]

Training_loss 0.70837,   Relative Error 0.11097
Training_loss 0.68616,   Relative Error 0.10931


 68%|██████▊   | 135/200 [00:20<00:08,  7.60it/s]

Training_loss 0.65773,   Relative Error 0.10702
Training_loss 0.64433,   Relative Error 0.10590


 68%|██████▊   | 137/200 [00:20<00:07,  7.97it/s]

Training_loss 0.62784,   Relative Error 0.10454
Training_loss 0.59977,   Relative Error 0.10225


 70%|██████▉   | 139/200 [00:21<00:07,  7.97it/s]

Training_loss 0.58283,   Relative Error 0.10085
Training_loss 0.56538,   Relative Error 0.09929


 70%|███████   | 141/200 [00:21<00:07,  7.62it/s]

Training_loss 0.54539,   Relative Error 0.09752
Training_loss 0.53408,   Relative Error 0.09652


 72%|███████▏  | 143/200 [00:21<00:07,  7.79it/s]

Training_loss 0.52248,   Relative Error 0.09552
Training_loss 0.50383,   Relative Error 0.09373


 72%|███████▎  | 145/200 [00:21<00:07,  7.31it/s]

Training_loss 0.48705,   Relative Error 0.09220
Training_loss 0.47204,   Relative Error 0.09074


 74%|███████▎  | 147/200 [00:22<00:07,  7.43it/s]

Training_loss 0.45111,   Relative Error 0.08871
Training_loss 0.43368,   Relative Error 0.08702


 74%|███████▍  | 149/200 [00:22<00:07,  7.26it/s]

Training_loss 0.41685,   Relative Error 0.08534
Training_loss 0.40705,   Relative Error 0.08435


 76%|███████▌  | 151/200 [00:22<00:06,  7.10it/s]

Training_loss 0.39313,   Relative Error 0.08289
Training_loss 0.37950,   Relative Error 0.08143


 76%|███████▋  | 153/200 [00:22<00:06,  7.03it/s]

Training_loss 0.36522,   Relative Error 0.07985
Training_loss 0.35359,   Relative Error 0.07858


 78%|███████▊  | 155/200 [00:23<00:06,  7.25it/s]

Training_loss 0.34035,   Relative Error 0.07715
Training_loss 0.33095,   Relative Error 0.07611


 78%|███████▊  | 157/200 [00:23<00:05,  7.33it/s]

Training_loss 0.32321,   Relative Error 0.07526
Training_loss 0.31199,   Relative Error 0.07395


 80%|███████▉  | 159/200 [00:23<00:05,  7.26it/s]

Training_loss 0.29807,   Relative Error 0.07226
Training_loss 0.29123,   Relative Error 0.07141


 80%|████████  | 161/200 [00:24<00:05,  7.39it/s]

Training_loss 0.28064,   Relative Error 0.07011
Training_loss 0.26906,   Relative Error 0.06869


 82%|████████▏ | 163/200 [00:24<00:05,  7.18it/s]

Training_loss 0.26177,   Relative Error 0.06778
Training_loss 0.25544,   Relative Error 0.06697


 82%|████████▎ | 165/200 [00:24<00:04,  7.18it/s]

Training_loss 0.24583,   Relative Error 0.06572
Training_loss 0.23919,   Relative Error 0.06485


 84%|████████▎ | 167/200 [00:24<00:04,  7.05it/s]

Training_loss 0.22800,   Relative Error 0.06333
Training_loss 0.22043,   Relative Error 0.06231


 84%|████████▍ | 169/200 [00:25<00:04,  7.08it/s]

Training_loss 0.20682,   Relative Error 0.06034
Training_loss 0.20213,   Relative Error 0.05966


 86%|████████▌ | 171/200 [00:25<00:04,  7.15it/s]

Training_loss 0.19700,   Relative Error 0.05889
Training_loss 0.19291,   Relative Error 0.05827


 86%|████████▋ | 173/200 [00:25<00:03,  7.01it/s]

Training_loss 0.18334,   Relative Error 0.05680
Training_loss 0.17593,   Relative Error 0.05563


 88%|████████▊ | 175/200 [00:26<00:03,  7.08it/s]

Training_loss 0.17254,   Relative Error 0.05510
Training_loss 0.16562,   Relative Error 0.05397


 88%|████████▊ | 177/200 [00:26<00:03,  7.15it/s]

Training_loss 0.15994,   Relative Error 0.05304
Training_loss 0.15647,   Relative Error 0.05244


 90%|████████▉ | 179/200 [00:26<00:02,  7.10it/s]

Training_loss 0.15213,   Relative Error 0.05173
Training_loss 0.14591,   Relative Error 0.05068


 90%|█████████ | 181/200 [00:26<00:02,  7.06it/s]

Training_loss 0.13981,   Relative Error 0.04961
Training_loss 0.13692,   Relative Error 0.04908


 92%|█████████▏| 183/200 [00:27<00:02,  7.04it/s]

Training_loss 0.13390,   Relative Error 0.04853
Training_loss 0.12991,   Relative Error 0.04780


 92%|█████████▎| 185/200 [00:27<00:02,  7.26it/s]

Training_loss 0.12677,   Relative Error 0.04724
Training_loss 0.12318,   Relative Error 0.04655


 94%|█████████▎| 187/200 [00:27<00:01,  7.16it/s]

Training_loss 0.11999,   Relative Error 0.04595
Training_loss 0.11665,   Relative Error 0.04530


 94%|█████████▍| 189/200 [00:28<00:01,  7.29it/s]

Training_loss 0.11143,   Relative Error 0.04425
Training_loss 0.10570,   Relative Error 0.04309


 96%|█████████▌| 191/200 [00:28<00:01,  6.89it/s]

Training_loss 0.10295,   Relative Error 0.04252
Training_loss 0.09946,   Relative Error 0.04180


 96%|█████████▋| 193/200 [00:28<00:01,  6.95it/s]

Training_loss 0.09436,   Relative Error 0.04071
Training_loss 0.09075,   Relative Error 0.03993


 98%|█████████▊| 195/200 [00:28<00:00,  6.87it/s]

Training_loss 0.08853,   Relative Error 0.03945
Training_loss 0.08621,   Relative Error 0.03893


 98%|█████████▊| 197/200 [00:29<00:00,  7.07it/s]

Training_loss 0.08284,   Relative Error 0.03816
Training_loss 0.08022,   Relative Error 0.03759


100%|█████████▉| 199/200 [00:29<00:00,  6.98it/s]

Training_loss 0.07695,   Relative Error 0.03682
Training_loss 0.07425,   Relative Error 0.03620


100%|██████████| 200/200 [00:29<00:00,  6.74it/s]

Training_loss 0.07176,   Relative Error 0.03557





In [65]:
#plot.plot(test_loss)
print(parameters_to_vector(models[19].parameters()), W[1])

tensor([ 1.9324e+00,  1.9037e+00,  3.5253e-03,  4.8826e+00,  1.8235e+00,
         2.8820e+00, -4.8826e+00, -1.9083e+00], grad_fn=<CatBackward0>) [ 2.  2.  0.  5.  2.  3. -5. -2.]


In [70]:
for i in G.neighbors(3):
    print(i)

0
1
2
6
7
15
17


In [80]:
torch.norm(projection_list[3][2])

tensor(3.3668, grad_fn=<CopyBackwards>)

In [78]:
torch.norm(projection_list[3][17])

tensor(3.0908, grad_fn=<CopyBackwards>)

In [25]:
projected_weights[0]

[0,
 tensor([ 5.2790e+00, -2.5029e-01,  9.1683e-03,  7.0979e+00,  3.6382e-01,
         -1.9726e+00,  1.0874e+01,  2.9370e-01]),
 tensor([ 2.9348e+00, -2.2912e-01, -7.7133e-03,  1.8830e+00, -5.2439e+00,
          1.0127e-01,  8.6279e+00, -6.4575e-01]),
 tensor([ 1.9854, -1.1935,  0.0217, -4.1667,  0.0568, -1.3243,  7.5671,  0.6386]),
 0,
 tensor([ 4.3646e+00,  1.7972e-01,  6.4962e-03,  7.7013e+00,  6.3390e-01,
         -2.0343e+00,  8.8277e+00,  4.0564e+00]),
 0,
 tensor([ 3.2554e+00,  6.8024e-01,  3.4622e-03,  3.0119e+00, -4.5969e+00,
         -5.8841e+00, -3.4314e+00,  6.2406e-01]),
 tensor([ 5.9126,  3.1070,  0.0465,  9.8781, -1.5906, -0.4128,  3.7419,  2.8448]),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [26]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [27]:
print(test_loss)

[67.33135414 64.57100201 61.27441311 60.00060463 58.65205193 56.40827847
 53.31119537 51.32714367 49.33261395 47.54439068 46.269557   43.40295792
 42.05415058 40.75786495 39.83782101 38.06907368 36.55533314 34.83418465
 33.51958656 32.18213272 31.13984156 30.17588854 29.04995108 27.89095736
 27.00554848 25.85731554 24.95472002 24.09577179 23.28443193 22.41710711
 21.63378    20.82412243 20.16054583 19.53862286 18.38286591 17.94714689
 17.05394793 16.16547894 15.6152935  14.67442036 14.24921751 13.59483433
 13.15179515 12.6843164  12.3842926  12.03766274 11.53843713 11.1109519
 10.86659288 10.61069608 10.31712437  9.93964243  9.71444321  9.48615575
  9.14067149  8.76560855  8.37353909  8.08962882  7.95410132  7.76755345
  7.35702753  7.075629    6.91544116  6.78558016  6.64930212  6.43153965
  6.20015156  6.00742829  5.8313092   5.66502392  5.51256323  5.36497355
  5.09306157  4.95911086  4.79514909  4.61898971  4.48799241  4.38134193
  4.33169758  4.13522625  4.03800017  3.90009385  3.

In [28]:
np.save( 'training_loss_sheave_fml' + str(eta).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), test_loss)
np.save('relative_error_sheave_fml' + str(eta).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), total_rel_error)

In [29]:
'training_loss_sheave_fml' + str(eta).replace('.', '_')+ '_pout' + str(pout).replace('.', '_')

'training_loss_sheave_fml0_01_pout0_01'

In [30]:
'relative_error_sheave_fml' + str(eta).replace('.', '_')+ '_pout' + str(pout).replace('.', '_')

'relative_error_sheave_fml0_01_pout0_01'