In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.01
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-3
no_users = sum(cluster_sizes)
batch_size = 20
epochs = 1
it = 1000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.125      0.         0.14285714 0.         0.
  0.         0.         0.16666667 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.125      0.         0.         0.125      0.125      0.125
  0.125      0.125      0.125      0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.14285714 0.         0.16666667
  0.16666667 0.14285714 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.16666667]
 [0.14285714 0.125      0.14285714 0.         0.         0.
  0.14285714 0.14285714 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.14285714]
 [0.         0.125      0.         0.         0.         0.
  0.16666667 0.         0.16666667 0.         0.         0.
  0.         0.         0.16666667 0.         0. 

In [4]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [5]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [6]:
datapoints = {}
count = 0
W1 = np.array([2, 2])
W2 = np.array([-2, 2])
W = [W1, W2]
m = 200
n = 2
noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i]) + np.random.normal(0,noise_sd)
        datapoints[count] = {
                'features': features,
                'degree': node_degree(count, G),
                'label': label,
                'neighbors': get_neighbors(count, G)
            }
        count += 1

In [7]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [8]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(2, 1, bias=False)
        #self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        #x = F.relu(self.fc1(x))
        output = self.fc1(x)
        #output = self.fc3(x)
        return output

In [9]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [10]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(6.5186, grad_fn=<MseLossBackward0>) tensor([ 5.1784, -2.6586]) tensor([-0.2075,  0.5876], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(3.9923, grad_fn=<MseLossBackward0>) tensor([ 3.2408, -1.6909]) tensor([-0.2593,  0.6142], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(4.6513, grad_fn=<MseLossBackward0>) tensor([ 3.5628, -2.3494]) tensor([-0.2917,  0.6311], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(3.1075, grad_fn=<MseLossBackward0>) tensor([ 2.2779, -1.7873]) tensor([-0.3273,  0.6546], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(5.5905, grad_fn=<MseLossBackward0>) tensor([ 4.7732, -2.4902]) tensor([-0.3501,  0.6725], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(3.4250, grad_fn=<MseLossBackward0>) tensor([ 2.9686, -1.6073]) tensor([-0.3978,  0.6974], grad_fn=<CatBackward0>)
torch.Size([50, 1])
to

16 tensor(0.3663, grad_fn=<MseLossBackward0>) tensor([ 0.7944, -0.7125]) tensor([-1.5342,  1.4909], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
16 tensor(0.4332, grad_fn=<MseLossBackward0>) tensor([ 0.9737, -0.8379]) tensor([-1.5422,  1.4981], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
16 tensor(0.3014, grad_fn=<MseLossBackward0>) tensor([ 0.5761, -0.6981]) tensor([-1.5519,  1.5064], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.5024, grad_fn=<MseLossBackward0>) tensor([ 1.3120, -0.8724]) tensor([-1.5577,  1.5134], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.3169, grad_fn=<MseLossBackward0>) tensor([ 0.7274, -0.6730]) tensor([-1.5708,  1.5221], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.3746, grad_fn=<MseLossBackward0>) tensor([ 0.8986, -0.7854]) tensor([-1.5781,  1.5289], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.2614

31 tensor(0.0370, grad_fn=<MseLossBackward0>) tensor([ 0.1596, -0.2785]) tensor([-1.8672,  1.8103], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0566, grad_fn=<MseLossBackward0>) tensor([ 0.3997, -0.3255]) tensor([-1.8688,  1.8131], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0379, grad_fn=<MseLossBackward0>) tensor([ 0.1935, -0.2785]) tensor([-1.8728,  1.8163], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0439, grad_fn=<MseLossBackward0>) tensor([ 0.2726, -0.2964]) tensor([-1.8747,  1.8191], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0323, grad_fn=<MseLossBackward0>) tensor([ 0.1466, -0.2617]) tensor([-1.8775,  1.8221], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
33 tensor(0.0491, grad_fn=<MseLossBackward0>) tensor([ 0.3695, -0.3048]) tensor([-1.8789,  1.8247], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
33 tensor(0.0330

50 tensor(0.0045, grad_fn=<MseLossBackward0>) tensor([ 0.0989, -0.0995]) tensor([-1.9686,  1.9415], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(0.0032, grad_fn=<MseLossBackward0>) tensor([ 0.0391, -0.0921]) tensor([-1.9696,  1.9425], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(0.0036, grad_fn=<MseLossBackward0>) tensor([ 0.0672, -0.0915]) tensor([-1.9700,  1.9434], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(0.0028, grad_fn=<MseLossBackward0>) tensor([ 0.0317, -0.0843]) tensor([-1.9707,  1.9443], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(0.0039, grad_fn=<MseLossBackward0>) tensor([ 0.0917, -0.0932]) tensor([-1.9710,  1.9452], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(0.0028, grad_fn=<MseLossBackward0>) tensor([ 0.0358, -0.0865]) tensor([-1.9719,  1.9461], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(0.0031

70 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0148, -0.0246]) tensor([-1.9936,  1.9846], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
70 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0059, -0.0235]) tensor([-1.9938,  1.9848], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0202, -0.0249]) tensor([-1.9938,  1.9851], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0059, -0.0244]) tensor([-1.9940,  1.9853], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0138, -0.0231]) tensor([-1.9941,  1.9856], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0054, -0.0220]) tensor([-1.9942,  1.9858], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
72 tensor(0.0002

90 tensor(1.6269e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0034, -0.0066]) tensor([-1.9986,  1.9958], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
90 tensor(1.4295e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0011, -0.0065]) tensor([-1.9986,  1.9959], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.6478e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0046, -0.0066]) tensor([-1.9986,  1.9960], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.3763e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0010, -0.0067]) tensor([-1.9987,  1.9960], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.4245e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0032, -0.0062]) tensor([-1.9987,  1.9961], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.2557e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0010, -0.0061]) tensor([-1.9987,  1.9962], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([

In [11]:
parameters_to_vector(model.parameters())

tensor([-1.9993,  1.9978], grad_fn=<CatBackward0>)

In [12]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [13]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [14]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0)
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [15]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [16]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([-0.2887, -0.6727])


tensor([-0.5773, -1.3454], grad_fn=<CatBackward0>)

In [21]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []


for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            weights = parameters_to_vector(model.parameters())
            mat_vec_sum = torch.zeros_like(weights)
            for k in G.neighbors(i):
                 mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(projected_weights[k][i] - projected_weights[i][k],
                                                                  torch.transpose(weights, -1, 0)))
            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * mat_vec_sum)
                                         
    projected_weights = []                                          
    update_ProjWeight(projection_list, projected_weights, first_run=False)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    for k in range(no_users):
      
      g_loss = testing(models[i], datapoints[i], 50, criterion)
      local_test_loss.append(g_loss)
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/1000 [00:00<02:22,  7.03it/s]

tensor([-2.2338, -0.8660])
20
20
20
tensor([-6.1805, -4.1533])
20
20
20
tensor([-3.2886, -3.6561])
20
20
20
20
20
20
20
tensor([-1.3579, -3.4181])
20
20
20
20
20
tensor([-2.7099, -3.1932])
20
20
20
20
20
20
tensor([-4.2619, -3.2560])
20
20
20
20
tensor([-4.5286, -3.0897])
20
20
20
tensor([-4.6780, -3.1842])
20
20
20
20
20
tensor([-2.7924, -3.8710])
20
20
20
20
20
20
tensor([-2.3332, -5.0100])
20
20
20
20
20
tensor([ 3.4721, -1.8105])
20
20
tensor([ 2.8966, -1.9572])
20
20
20
tensor([ 1.8248, -3.7180])
20
20
20
tensor([ 3.1632, -3.7378])
20
20
20
20
20
tensor([ 5.5343, -3.1481])
20
20
20
20
20
20
tensor([ 5.4844, -2.7808])
20
20
20
20
20
tensor([ 2.1039, -4.9914])
20
20
20
20
20
tensor([ 4.6806, -6.2223])
20
20
20
20
20
20
20
tensor([ 3.0290, -3.3924])
20
20
20
tensor([ 4.3061, -4.1878])
20
20
20
20
20
20
Training_loss 6.94563
tensor([-6.5012, -4.4284])
20
20
20
tensor([-7.0771, -6.1763])
20
20
20
tensor([-2.8308, -5.9688])
20
20
20
20
20
20
20
tensor([-3.5357, -2.6442])
20
20
20
20
20


  0%|          | 2/1000 [00:00<02:08,  7.75it/s]

20
20
20
20
20
20
tensor([ 2.1233, -3.7602])
20
20
20
20
20
tensor([ 2.9899, -6.9087])
20
20
20
20
20
tensor([ 4.9457, -4.2530])
20
20
20
20
20
20
20
tensor([ 1.3516, -3.5958])
20
20
20
tensor([ 5.4944, -4.0427])
20
20
20
20
20
20
Training_loss 6.91302
tensor([-5.5194, -4.4555])
20
20
20
tensor([-3.3728, -2.7379])
20
20
20
tensor([-3.9075, -3.5607])
20
20
20
20
20
20
20
tensor([-4.7651, -4.2660])
20
20
20
20
20
tensor([-3.4703, -3.4130])
20
20
20
20
20
20
tensor([-7.2843, -7.1693])
20
20
20
20
tensor([-4.2238, -0.7341])
20
20
20
tensor([-8.4432, -6.0563])
20
20
20
20
20
tensor([-4.1708, -3.9271])
20
20
20
20
20
20
tensor([-4.0451, -4.3426])
20
20
20
20
20
tensor([ 1.6819, -5.5290])
20
20
tensor([ 2.3830, -0.7676])
20
20
20
tensor([ 2.6956, -4.9047])
20
20
20
tensor([ 4.8946, -5.0604])
20
20
20
20
20
tensor([ 2.6445, -5.8218])
20
20
20
20
20
20
tensor([ 3.9839, -1.3062])
20
20
20
20
20
tensor([ 5.4544, -2.9988])
20
20
20
20
20
tensor([ 3.1593, -4.5863])
20
20
20
20
20
20
20
tensor([ 3.8

  0%|          | 5/1000 [00:00<01:50,  9.01it/s]

20
20
20
20
20
20
20
tensor([-1.6672, -2.9117])
20
20
20
20
20
tensor([-4.8800, -4.5438])
20
20
20
20
20
20
tensor([-5.2837, -6.4477])
20
20
20
20
tensor([-3.5306, -2.0336])
20
20
20
tensor([-6.0245, -5.3104])
20
20
20
20
20
tensor([-4.5466, -4.2447])
20
20
20
20
20
20
tensor([-2.6550, -4.9795])
20
20
20
20
20
tensor([ 3.8347, -3.7540])
20
20
tensor([ 2.5182, -4.0252])
20
20
20
tensor([ 9.5174, -4.8311])
20
20
20
tensor([ 2.1720, -3.9579])
20
20
20
20
20
tensor([ 3.4906, -1.6863])
20
20
20
20
20
20
tensor([ 6.4675, -3.7505])
20
20
20
20
20
tensor([ 2.3530, -3.1550])
20
20
20
20
20
tensor([ 4.7075, -4.6632])
20
20
20
20
20
20
20
tensor([ 4.9494, -3.0318])
20
20
20
tensor([ 2.2189, -3.5016])
20
20
20
20
20
20
Training_loss 6.87107
tensor([-5.8685, -2.7536])
20
20
20
tensor([-2.7264, -1.6525])
20
20
20
tensor([-4.0208, -6.4495])
20
20
20
20
20
20
20
tensor([-3.8592, -4.1252])
20
20
20
20
20
tensor([-5.8997, -2.2730])
20
20
20
20
20
20
tensor([-1.1268, -4.6255])
20
20
20
20
tensor([-2.3786

  1%|          | 6/1000 [00:00<01:54,  8.71it/s]

tensor([-5.4347, -3.4213])
20
20
20
tensor([-5.1853, -4.7521])
20
20
20
tensor([-3.9038, -4.8927])
20
20
20
20
20
20
20
tensor([-3.2877, -5.8090])
20
20
20
20
20
tensor([-5.6779, -3.7123])
20
20
20
20
20
20
tensor([-3.4253, -3.4678])
20
20
20
20
tensor([-1.6166, -1.6354])
20
20
20
tensor([-1.9217, -6.5823])
20
20
20
20
20
tensor([-2.0129, -4.2419])
20
20
20
20
20
20
tensor([-2.4734, -4.0394])
20
20
20
20
20
tensor([ 3.8639, -3.9239])
20
20
tensor([ 2.8438, -5.5920])
20
20
20
tensor([ 6.5717, -5.0191])
20
20
20
tensor([ 5.5871, -7.7867])
20
20
20
20
20
tensor([ 3.4280, -3.2575])
20
20
20
20
20
20
tensor([ 5.3223, -2.8295])
20
20
20
20
20
tensor([ 3.5155, -7.2524])
20
20
20
20
20
tensor([ 4.0284, -4.4801])
20
20
20
20
20
20
20
tensor([ 5.8837, -6.5175])
20
20
20
tensor([ 2.8576, -2.4154])
20
20
20
20
20
20
Training_loss 6.82605
tensor([-4.3201, -2.7157])
20
20
20
tensor([-2.2365, -2.1657])
20
20
20
tensor([-5.1713, -4.3474])
20
20
20
20
20
20
20
tensor([-4.1573, -3.2334])
20
20
20
20
20


  1%|          | 7/1000 [00:00<01:54,  8.71it/s]

Training_loss 6.80160
tensor([-4.1684, -1.7497])
20
20
20
tensor([-4.0982, -3.0594])
20
20
20
tensor([-3.5158, -1.0229])
20
20
20
20
20
20
20
tensor([-3.6412, -6.0176])
20
20
20
20
20
tensor([-3.1463, -4.2734])
20
20
20
20
20
20
tensor([-7.3534, -4.1478])
20
20
20
20
tensor([-4.5948, -4.3798])
20
20
20
tensor([-4.2780, -7.4283])
20
20
20
20
20
tensor([-5.2960, -3.5405])
20
20
20
20
20
20
tensor([-2.0264, -2.5963])
20
20
20
20
20
tensor([ 4.8807, -3.4221])
20
20
tensor([ 3.5741, -3.1133])
20
20
20
tensor([ 5.8887, -6.6945])
20
20
20
tensor([ 2.1065, -3.9516])
20
20
20
20
20
tensor([ 4.1634, -3.6192])
20
20
20
20
20
20
tensor([ 5.0709, -2.6364])
20
20
20
20
20
tensor([ 1.2694, -3.1735])
20
20
20
20
20
tensor([ 5.4557, -2.2337])
20
20
20
20
20
20
20
tensor([ 2.4537, -5.3568])
20
20
20
tensor([ 3.9811, -4.6234])
20
20
20
20
20
20
Training_loss 6.77197
tensor([-5.2015, -4.2678])
20
20
20
tensor([-6.2895, -6.2386])
20
20
20
tensor([-4.3951, -4.0211])
20
20
20
20
20
20
20
tensor([-2.8448, -2.

  1%|          | 10/1000 [00:01<01:48,  9.14it/s]

tensor([-7.7439, -5.1579])
20
20
20
tensor([-3.1368, -4.0343])
20
20
20
tensor([-2.9750, -2.2225])
20
20
20
20
20
20
20
tensor([-1.7583, -1.7794])
20
20
20
20
20
tensor([-2.6130, -0.8557])
20
20
20
20
20
20
tensor([-3.3121, -5.6656])
20
20
20
20
tensor([-4.6386, -4.9053])
20
20
20
tensor([-2.5624, -4.2728])
20
20
20
20
20
tensor([-3.9995, -6.1496])
20
20
20
20
20
20
tensor([-3.0334, -2.6280])
20
20
20
20
20
tensor([ 2.4434, -4.9669])
20
20
tensor([ 1.2332, -3.3237])
20
20
20
tensor([ 2.1612, -3.4069])
20
20
20
tensor([ 1.5898, -3.8926])
20
20
20
20
20
tensor([ 1.6477, -5.5969])
20
20
20
20
20
20
tensor([ 2.6928, -2.8506])
20
20
20
20
20
tensor([ 3.1454, -4.1870])
20
20
20
20
20
tensor([ 7.9750, -2.6867])
20
20
20
20
20
20
20
tensor([ 2.6035, -3.2628])
20
20
20
tensor([ 2.0967, -4.9777])
20
20
20
20
20
20
Training_loss 6.72047
tensor([-5.7687, -4.6281])
20
20
20
tensor([-6.8110, -5.4275])
20
20
20
tensor([-2.5924, -4.2110])
20
20
20
20
20
20
20
tensor([-2.4545, -3.1933])
20
20
20
20
20


  1%|          | 12/1000 [00:01<01:51,  8.88it/s]

20
20
20
20
20
Training_loss 6.70429
tensor([-4.6133, -3.3311])
20
20
20
tensor([-5.6440, -4.5340])
20
20
20
tensor([-4.2277, -4.0534])
20
20
20
20
20
20
20
tensor([-3.8237, -4.5644])
20
20
20
20
20
tensor([-2.1468, -2.8935])
20
20
20
20
20
20
tensor([-1.3964, -7.3767])
20
20
20
20
tensor([-1.5590, -2.4900])
20
20
20
tensor([-7.7620, -5.1532])
20
20
20
20
20
tensor([-1.1434, -5.7496])
20
20
20
20
20
20
tensor([-0.4158, -5.9654])
20
20
20
20
20
tensor([ 1.6815, -3.8910])
20
20
tensor([ 1.9127, -3.6298])
20
20
20
tensor([ 6.5016, -5.2751])
20
20
20
tensor([ 4.1867, -3.7438])
20
20
20
20
20
tensor([ 5.5636, -4.3297])
20
20
20
20
20
20
tensor([ 3.2986, -3.8793])
20
20
20
20
20
tensor([ 4.0420, -4.4998])
20
20
20
20
20
tensor([ 4.4654, -3.7421])
20
20
20
20
20
20
20
tensor([ 4.3408, -6.0748])
20
20
20
tensor([ 5.1526, -4.2661])
20
20
20
20
20
20
Training_loss 6.67253
tensor([-1.7538, -1.9775])
20
20
20
tensor([-5.1070, -7.3079])
20
20
20
tensor([-3.9584, -2.5739])
20
20
20
20
20
20
20
tenso

  1%|▏         | 13/1000 [00:01<01:50,  8.91it/s]

20
20
20
20
tensor([-2.5870, -4.5915])
20
20
20
tensor([-2.7003, -3.8326])
20
20
20
20
20
tensor([-2.3906, -8.3463])
20
20
20
20
20
20
tensor([-1.9582, -2.8643])
20
20
20
20
20
tensor([ 1.7387, -2.2752])
20
20
tensor([ 1.8605, -2.1032])
20
20
20
tensor([ 6.1365, -5.7190])
20
20
20
tensor([ 4.7454, -5.8023])
20
20
20
20
20
tensor([ 3.9932, -2.2042])
20
20
20
20
20
20
tensor([ 5.1608, -4.0683])
20
20
20
20
20
tensor([ 2.7358, -5.4253])
20
20
20
20
20
tensor([ 4.6695, -3.7545])
20
20
20
20
20
20
20
tensor([ 2.5556, -4.3854])
20
20
20
tensor([ 3.2749, -4.6826])
20
20
20
20
20
20
Training_loss 6.64517
tensor([-6.7072, -4.4761])
20
20
20
tensor([-3.1679, -1.6547])
20
20
20
tensor([-1.8861, -3.9370])
20
20
20
20
20
20
20
tensor([-3.6167, -3.7204])
20
20
20
20
20
tensor([-4.2667, -4.2108])
20
20
20
20
20
20
tensor([-6.1067, -5.1747])
20
20
20
20
tensor([-4.7482, -3.3790])
20
20
20
tensor([-2.1290, -4.3641])
20
20
20
20
20
tensor([-1.9176, -5.5243])
20
20
20
20
20
20
tensor([-1.4860, -4.9098])


  2%|▏         | 15/1000 [00:01<01:50,  8.87it/s]

Training_loss 6.63116
tensor([-6.4604, -3.4920])
20
20
20
tensor([-4.1724, -6.6426])
20
20
20
tensor([-3.3371, -1.9870])
20
20
20
20
20
20
20
tensor([-4.9695, -5.5685])
20
20
20
20
20
tensor([-4.0995, -2.0267])
20
20
20
20
20
20
tensor([-7.1165, -7.0745])
20
20
20
20
tensor([-3.8320, -2.3621])
20
20
20
tensor([-2.6318, -4.7394])
20
20
20
20
20
tensor([-4.4824, -4.4894])
20
20
20
20
20
20
tensor([-3.0916, -4.8971])
20
20
20
20
20
tensor([ 4.8942, -1.9149])
20
20
tensor([ 2.6173, -4.8954])
20
20
20
tensor([ 8.3191, -6.4899])
20
20
20
tensor([ 7.4403, -3.5603])
20
20
20
20
20
tensor([ 2.8453, -5.4540])
20
20
20
20
20
20
tensor([ 6.7881, -2.6026])
20
20
20
20
20
tensor([ 3.2030, -5.2641])
20
20
20
20
20
tensor([ 5.0466, -3.0337])
20
20
20
20
20
20
20
tensor([ 3.0252, -4.2123])
20
20
20
tensor([ 2.2001, -4.4220])
20
20
20
20
20
20
Training_loss 6.60819
tensor([-8.6316, -6.7280])
20
20
20
tensor([-5.0501, -3.5172])
20
20
20
tensor([-3.0599, -1.4392])
20
20
20
20
20
20
20
tensor([-2.0580, -4.

  2%|▏         | 17/1000 [00:01<01:47,  9.11it/s]

Training_loss 6.59150
tensor([-7.0290, -8.3757])
20
20
20
tensor([-3.3777, -3.9535])
20
20
20
tensor([-4.5411, -4.5290])
20
20
20
20
20
20
20
tensor([-5.1384, -2.8804])
20
20
20
20
20
tensor([-6.5330, -3.4889])
20
20
20
20
20
20
tensor([-4.5673, -5.0988])
20
20
20
20
tensor([-2.8901, -2.0939])
20
20
20
tensor([-7.1709, -3.5580])
20
20
20
20
20
tensor([-0.8145, -2.5614])
20
20
20
20
20
20
tensor([-0.6318, -4.0346])
20
20
20
20
20
tensor([ 3.4974, -2.6928])
20
20
tensor([ 3.4052, -2.5399])
20
20
20
tensor([ 3.1848, -2.9518])
20
20
20
tensor([ 2.8266, -3.3512])
20
20
20
20
20
tensor([ 2.7305, -1.5892])
20
20
20
20
20
20
tensor([ 1.7978, -2.7364])
20
20
20
20
20
tensor([ 2.6526, -2.2871])
20
20
20
20
20
tensor([ 4.0752, -5.2661])
20
20
20
20
20
20
20
tensor([ 2.5662, -2.1389])
20
20
20
tensor([ 1.4811, -2.7959])
20
20
20
20
20
20
Training_loss 6.57674
tensor([-3.9832, -3.6907])
20
20
20
tensor([-2.9867, -4.2780])
20
20
20
tensor([-4.3430, -3.5640])
20
20
20
20
20
20
20
tensor([-1.6114, -3.

  2%|▏         | 19/1000 [00:02<01:48,  9.01it/s]

Training_loss 6.55883
tensor([-5.0383, -5.1244])
20
20
20
tensor([-4.8918, -2.9864])
20
20
20
tensor([-2.3205, -2.8756])
20
20
20
20
20
20
20
tensor([-4.9595, -4.2663])
20
20
20
20
20
tensor([-4.5390, -6.0043])
20
20
20
20
20
20
tensor([-4.0107, -2.7279])
20
20
20
20
tensor([-5.3526, -3.3322])
20
20
20
tensor([-5.0436, -4.2401])
20
20
20
20
20
tensor([-3.8196, -4.0352])
20
20
20
20
20
20
tensor([-3.8751, -3.3035])
20
20
20
20
20
tensor([ 4.6009, -1.8908])
20
20
tensor([ 1.7961, -2.3623])
20
20
20
tensor([ 4.0664, -4.2888])
20
20
20
tensor([ 3.7414, -3.8417])
20
20
20
20
20
tensor([ 6.0303, -4.6557])
20
20
20
20
20
20
tensor([ 3.6251, -2.2034])
20
20
20
20
20
tensor([ 1.9501, -4.6137])
20
20
20
20
20
tensor([ 3.5125, -3.7602])
20
20
20
20
20
20
20
tensor([ 4.2347, -3.2972])
20
20
20
tensor([ 5.0925, -3.6795])
20
20
20
20
20
20
Training_loss 6.52974
tensor([-4.9944, -2.2887])
20
20
20
tensor([-4.4810, -4.3623])
20
20
20
tensor([-3.1814, -4.4816])
20
20
20
20
20
20
20
tensor([-5.7440, -3.

  2%|▏         | 21/1000 [00:02<01:47,  9.13it/s]

Training_loss 6.50178
tensor([-5.6629, -4.4966])
20
20
20
tensor([-5.3285, -2.5819])
20
20
20
tensor([-6.4421, -6.3548])
20
20
20
20
20
20
20
tensor([-2.0211, -1.4906])
20
20
20
20
20
tensor([-4.8261, -3.5153])
20
20
20
20
20
20
tensor([-4.8698, -3.5219])
20
20
20
20
tensor([-2.7837, -2.0449])
20
20
20
tensor([-3.7850, -2.4069])
20
20
20
20
20
tensor([-3.0790, -4.5650])
20
20
20
20
20
20
tensor([-3.1235, -5.0854])
20
20
20
20
20
tensor([ 6.5113, -4.9243])
20
20
tensor([ 3.2377, -2.1063])
20
20
20
tensor([ 4.2152, -6.4989])
20
20
20
tensor([ 2.3923, -2.4597])
20
20
20
20
20
tensor([ 5.1295, -3.7870])
20
20
20
20
20
20
tensor([ 4.6355, -3.2105])
20
20
20
20
20
tensor([ 2.4519, -5.0751])
20
20
20
20
20
tensor([ 2.8328, -4.4158])
20
20
20
20
20
20
20
tensor([ 3.1425, -3.5048])
20
20
20
tensor([ 5.1905, -3.6170])
20
20
20
20
20
20
Training_loss 6.47275
tensor([-3.9378, -5.4832])
20
20
20
tensor([-3.7909, -4.6372])
20
20
20
tensor([-6.0159, -5.9776])
20
20
20
20
20
20
20
tensor([-2.9490, -3.

  2%|▏         | 23/1000 [00:02<01:59,  8.19it/s]

Training_loss 6.45769
tensor([-3.5786, -4.0423])
20
20
20
tensor([-6.1843, -3.9655])
20
20
20
tensor([-1.7893, -5.3086])
20
20
20
20
20
20
20
tensor([-2.1778, -0.9160])
20
20
20
20
20
tensor([-4.2580, -3.1967])
20
20
20
20
20
20
tensor([-5.3073, -6.0818])
20
20
20
20
tensor([-3.1605, -2.0464])
20
20
20
tensor([-4.5947, -4.2475])
20
20
20
20
20
tensor([-4.6654, -4.4704])
20
20
20
20
20
20
tensor([-2.1554, -2.5959])
20
20
20
20
20
tensor([ 5.7241, -7.1355])
20
20
tensor([ 1.7664, -3.3673])
20
20
20
tensor([ 4.3637, -6.3413])
20
20
20
tensor([ 3.3262, -5.3060])
20
20
20
20
20
tensor([ 4.1168, -3.1548])
20
20
20
20
20
20
tensor([ 4.4375, -5.0256])
20
20
20
20
20
tensor([ 3.7605, -3.7329])
20
20
20
20
20
tensor([ 7.0903, -5.0997])
20
20
20
20
20
20
20
tensor([ 4.1590, -2.3576])
20
20
20
tensor([ 2.0331, -5.2189])
20
20
20
20
20
20
Training_loss 6.43265
tensor([-5.0731, -4.0679])
20
20
20
tensor([-3.1075, -2.8929])
20
20
20
tensor([-2.2236, -2.9316])
20
20
20
20
20
20
20
tensor([-3.1093, -5.

  2%|▎         | 25/1000 [00:03<02:26,  6.63it/s]

Training_loss 6.42095
tensor([-2.1303, -2.9910])
20
20
20
tensor([-3.6315, -4.2630])
20
20
20
tensor([-6.5202, -6.1546])
20
20
20
20
20
20
20
tensor([-2.3928, -4.6865])
20
20
20
20
20
tensor([-3.0718, -2.7636])
20
20
20
20
20
20
tensor([-3.5199, -2.4085])
20
20
20
20
tensor([-2.4852, -3.8150])
20
20
20
tensor([-5.4139, -6.6064])
20
20
20
20
20
tensor([-0.8665, -4.6063])
20
20
20
20
20
20
tensor([-1.3817, -6.7161])
20
20
20
20
20
tensor([ 3.6297, -4.5433])
20
20
tensor([ 3.5575, -2.0537])
20
20
20
tensor([ 2.6978, -3.5583])
20
20
20
tensor([ 3.4722, -6.6305])
20
20
20
20
20
tensor([ 3.8033, -3.8164])
20
20
20
20
20
20
tensor([ 3.6566, -2.6096])
20
20
20
20
20
tensor([ 2.9556, -4.2980])
20
20
20
20
20
tensor([ 6.4484, -4.3094])
20
20
20
20
20
20
20
tensor([ 6.3442, -6.0147])
20
20
20
tensor([ 4.6180, -1.7496])
20
20
20
20
20
20
Training_loss 6.40058
tensor([-3.8196, -2.9963])
20
20
20
tensor([-1.4913, -5.6432])
20
20
20
tensor([-1.2869, -3.4066])
20
20
20
20
20
20
20
tensor([-1.7237, -3.

  3%|▎         | 27/1000 [00:03<02:09,  7.53it/s]

20
20
tensor([ 2.7043, -3.0426])
20
20
20
tensor([ 2.6761, -3.7857])
20
20
20
tensor([ 6.7409, -4.6036])
20
20
20
20
20
tensor([ 3.3312, -5.0066])
20
20
20
20
20
20
tensor([ 3.4480, -3.8385])
20
20
20
20
20
tensor([ 3.4898, -2.4958])
20
20
20
20
20
tensor([ 4.2426, -5.9180])
20
20
20
20
20
20
20
tensor([ 5.0044, -4.7205])
20
20
20
tensor([ 1.4826, -4.7110])
20
20
20
20
20
20
Training_loss 6.37914
tensor([-5.9892, -3.1911])
20
20
20
tensor([-5.8502, -2.8410])
20
20
20
tensor([-2.3778, -4.1504])
20
20
20
20
20
20
20
tensor([-2.1089, -3.5051])
20
20
20
20
20
tensor([-3.7345, -2.7226])
20
20
20
20
20
20
tensor([-4.6536, -9.2694])
20
20
20
20
tensor([-1.4282, -1.7710])
20
20
20
tensor([-4.4316, -3.0876])
20
20
20
20
20
tensor([-2.7023, -5.5090])
20
20
20
20
20
20
tensor([-2.4987, -4.8915])
20
20
20
20
20
tensor([ 3.8128, -3.5974])
20
20
tensor([ 1.9799, -2.0946])
20
20
20
tensor([ 3.2242, -3.5051])
20
20
20
tensor([ 1.7828, -2.5170])
20
20
20
20
20
tensor([ 5.9185, -6.0449])
20
20
20
20
20


  3%|▎         | 28/1000 [00:03<02:01,  7.97it/s]

tensor([-7.1257, -2.8708])
20
20
20
tensor([-3.5914, -3.2702])
20
20
20
tensor([-3.0101, -3.8687])
20
20
20
20
20
20
20
tensor([-4.8696, -3.3237])
20
20
20
20
20
tensor([-5.8435, -2.3566])
20
20
20
20
20
20
tensor([-5.8075, -4.9843])
20
20
20
20
tensor([-3.1469, -2.9270])
20
20
20
tensor([-8.0643, -5.9325])
20
20
20
20
20
tensor([-2.8570, -4.7595])
20
20
20
20
20
20
tensor([-2.2731, -4.1704])
20
20
20
20
20
tensor([ 2.4948, -4.1817])
20
20
tensor([ 3.2721, -2.3859])
20
20
20
tensor([ 4.5440, -8.0059])
20
20
20
tensor([ 3.2640, -4.2009])
20
20
20
20
20
tensor([ 6.3809, -5.8520])
20
20
20
20
20
20
tensor([ 5.6312, -2.1288])
20
20
20
20
20
tensor([ 5.0107, -5.2544])
20
20
20
20
20
tensor([ 5.7734, -1.9853])
20
20
20
20
20
20
20
tensor([ 5.9212, -4.2402])
20
20
20
tensor([ 1.8553, -4.6431])
20
20
20
20
20
20
Training_loss 6.33255
tensor([-2.8693, -2.3360])
20
20
20
tensor([-3.9888, -4.2475])
20
20
20
tensor([-4.7559, -6.6915])
20
20
20
20
20
20
20
tensor([-5.6607, -5.2200])
20
20
20
20
20


  3%|▎         | 30/1000 [00:03<02:07,  7.60it/s]

Training_loss 6.31175
tensor([-6.3663, -3.4767])
20
20
20
tensor([-3.8183, -3.4741])
20
20
20
tensor([-6.1292, -4.7298])
20
20
20
20
20
20
20
tensor([-2.9902, -3.6664])
20
20
20
20
20
tensor([-2.9687, -2.0178])
20
20
20
20
20
20
tensor([-2.3911, -6.2917])
20
20
20
20
tensor([-3.9723, -3.9959])
20
20
20
tensor([-4.8291, -7.6208])
20
20
20
20
20
tensor([-2.7282, -1.6284])
20
20
20
20
20
20
tensor([-1.0871, -3.8271])
20
20
20
20
20
tensor([ 3.0392, -6.2987])
20
20
tensor([ 2.1694, -1.9221])
20
20
20
tensor([ 3.1251, -4.3537])
20
20
20
tensor([ 2.2664, -4.1940])
20
20
20
20
20
tensor([ 2.9994, -2.5682])
20
20
20
20
20
20
tensor([ 3.2703, -1.7869])
20
20
20
20
20
tensor([ 2.8142, -5.2133])
20
20
20
20
20
tensor([ 4.8070, -4.7279])
20
20
20
20
20
20
20
tensor([ 4.5710, -4.8603])
20
20
20
tensor([ 2.7676, -2.8449])
20
20
20
20
20
20
Training_loss 6.29324
tensor([-3.9407, -1.2175])
20
20
20
tensor([-2.4697, -5.3552])
20
20
20
tensor([-3.9731, -3.8023])
20
20
20
20
20
20
20
tensor([-3.1149, -0.

  3%|▎         | 32/1000 [00:03<01:54,  8.42it/s]

20
20
tensor([ 2.8787, -1.6968])
20
20
20
tensor([ 5.9919, -4.8881])
20
20
20
tensor([ 4.1321, -3.3417])
20
20
20
20
20
tensor([ 4.2597, -3.4378])
20
20
20
20
20
20
tensor([3.8434, 0.1604])
20
20
20
20
20
tensor([ 1.2927, -2.8400])
20
20
20
20
20
tensor([ 4.5768, -2.7912])
20
20
20
20
20
20
20
tensor([ 4.7694, -2.7503])
20
20
20
tensor([ 4.4270, -3.7138])
20
20
20
20
20
20
Training_loss 6.26666
tensor([-2.8904, -4.1627])
20
20
20
tensor([-5.6803, -1.7415])
20
20
20
tensor([-2.0993, -5.6784])
20
20
20
20
20
20
20
tensor([-3.3314, -4.8048])
20
20
20
20
20
tensor([-2.6014, -3.0037])
20
20
20
20
20
20
tensor([-5.8683, -6.7663])
20
20
20
20
tensor([-3.1453, -3.5269])
20
20
20
tensor([-5.5261, -3.8116])
20
20
20
20
20
tensor([-4.4307, -2.5112])
20
20
20
20
20
20
tensor([-3.8622, -3.5714])
20
20
20
20
20
tensor([ 5.5959, -4.5081])
20
20
tensor([ 2.7470, -3.4256])
20
20
20
tensor([ 5.0203, -3.4168])
20
20
20
tensor([ 2.2282, -3.3060])
20
20
20
20
20
tensor([ 4.9043, -4.2727])
20
20
20
20
20
20

  3%|▎         | 33/1000 [00:03<01:52,  8.59it/s]

tensor([-4.2063, -3.3234])
20
20
20
tensor([-4.3904, -1.9283])
20
20
20
tensor([-2.6446, -4.9008])
20
20
20
20
20
20
20
tensor([-2.5574, -5.0818])
20
20
20
20
20
tensor([-3.8208, -4.0322])
20
20
20
20
20
20
tensor([-4.3310, -4.7263])
20
20
20
20
tensor([-3.1939, -4.0558])
20
20
20
tensor([-4.6161, -4.6643])
20
20
20
20
20
tensor([-2.4119, -5.3888])
20
20
20
20
20
20
tensor([-1.7971, -2.6011])
20
20
20
20
20
tensor([ 3.2360, -4.0082])
20
20
tensor([ 1.7936, -1.8618])
20
20
20
tensor([ 5.3499, -6.5823])
20
20
20
tensor([ 3.8706, -3.9416])
20
20
20
20
20
tensor([ 2.6880, -3.1454])
20
20
20
20
20
20
tensor([ 2.9841, -1.8722])
20
20
20
20
20
tensor([ 2.5411, -3.3045])
20
20
20
20
20
tensor([ 5.7749, -4.2989])
20
20
20
20
20
20
20
tensor([ 2.1845, -4.0083])
20
20
20
tensor([ 3.1497, -4.4608])
20
20
20
20
20
20
Training_loss 6.20944
tensor([-4.9037, -5.0577])
20
20
20
tensor([-4.3596, -4.0540])
20
20
20
tensor([-4.0058, -2.6165])
20
20
20
20
20
20
20
tensor([-1.7803, -2.5503])
20
20
20
20
20


  4%|▎         | 35/1000 [00:04<01:57,  8.19it/s]

Training_loss 6.19253
tensor([-4.2577, -6.2882])
20
20
20
tensor([-4.4046, -5.5861])
20
20
20
tensor([-4.8190, -5.8203])
20
20
20
20
20
20
20
tensor([-4.5113, -2.8728])
20
20
20
20
20
tensor([-4.1986, -2.8974])
20
20
20
20
20
20
tensor([-3.8555, -5.6340])
20
20
20
20
tensor([-2.9380, -3.4073])
20
20
20
tensor([-6.2038, -5.5927])
20
20
20
20
20
tensor([-1.4651, -3.8559])
20
20
20
20
20
20
tensor([-2.4871, -5.1073])
20
20
20
20
20
tensor([ 5.1676, -6.3699])
20
20
tensor([ 3.1253, -2.2821])
20
20
20
tensor([ 6.0611, -5.5484])
20
20
20
tensor([ 1.9747, -6.2175])
20
20
20
20
20
tensor([ 7.2904, -6.7510])
20
20
20
20
20
20
tensor([ 4.5052, -0.9179])
20
20
20
20
20
tensor([ 4.1624, -2.9414])
20
20
20
20
20
tensor([ 3.3266, -4.5903])
20
20
20
20
20
20
20
tensor([ 2.5294, -5.5332])
20
20
20
tensor([ 1.8650, -1.6899])
20
20
20
20
20
20
Training_loss 6.18098
tensor([-6.4831, -4.9624])
20
20
20
tensor([-6.6085, -4.1486])
20
20
20
tensor([-2.8359, -5.2595])
20
20
20
20
20
20
20
tensor([-2.9108, -2.

  4%|▎         | 37/1000 [00:04<01:55,  8.30it/s]

20
20
Training_loss 6.16081
tensor([-4.2733, -4.6466])
20
20
20
tensor([-5.3465, -5.5873])
20
20
20
tensor([-6.9988, -4.1150])
20
20
20
20
20
20
20
tensor([-2.7680, -1.6957])
20
20
20
20
20
tensor([-4.8778, -1.9436])
20
20
20
20
20
20
tensor([-4.8886, -6.4775])
20
20
20
20
tensor([-2.4882, -3.8404])
20
20
20
tensor([-2.7116, -6.1481])
20
20
20
20
20
tensor([-2.7296, -5.4796])
20
20
20
20
20
20
tensor([-4.4360, -2.6899])
20
20
20
20
20
tensor([ 6.5135, -5.5048])
20
20
tensor([ 3.1732, -2.5454])
20
20
20
tensor([ 7.5362, -4.0900])
20
20
20
tensor([ 4.4322, -6.3002])
20
20
20
20
20
tensor([ 4.7867, -2.9737])
20
20
20
20
20
20
tensor([ 4.6951, -1.9603])
20
20
20
20
20
tensor([ 3.7985, -3.7080])
20
20
20
20
20
tensor([ 2.8217, -3.0988])
20
20
20
20
20
20
20
tensor([ 2.6373, -2.3276])
20
20
20
tensor([ 1.2720, -2.1442])
20
20
20
20
20
20
Training_loss 6.14945
tensor([-4.2226, -3.5871])
20
20
20
tensor([-6.8700, -6.1187])


  4%|▎         | 37/1000 [00:04<01:58,  8.13it/s]

20
20
20
tensor([-1.3850, -2.9826])
20
20
20
20
20
20
20
tensor([-4.0359, -2.0372])
20
20
20
20
20
tensor([-3.5997, -1.1512])
20
20
20
20
20
20
tensor([-3.2431, -3.8086])
20
20
20
20
tensor([-3.2871, -4.0851])
20
20
20
tensor([-9.4358, -5.3391])
20
20
20
20
20
tensor([-2.6631, -4.9594])
20
20
20
20
20
20
tensor([-0.6397, -4.4262])
20
20
20
20
20
tensor([ 3.8145, -4.3606])
20
20
tensor([ 2.9305, -3.7158])
20
20
20
tensor([ 3.8648, -4.2991])
20
20
20
tensor([ 3.3566, -5.3605])
20
20
20
20
20
tensor([ 5.1981, -4.8817])
20
20
20
20
20
20
tensor([ 5.2878, -1.2341])
20
20
20
20
20
tensor([ 3.0776, -7.9117])
20
20
20
20
20
tensor([ 5.2894, -4.8398])
20
20
20
20
20
20
20
tensor([ 3.7044, -3.1182])
20
20
20
tensor([ 3.4775, -5.4193])
20
20
20
20
20
20





KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots()
x_axis = np.arange(1, len(train_loss)+1)
y_axis = np.array(train_loss)
ax.plot(x_axis, y_axis, label = "train loss")

y_axis = np.array(test_loss)
ax.plot(x_axis, y_axis, label = "test loss")

ax.set(xlabel='Number of Rounds', ylabel='Loss')
ax.legend()
ax.grid()

In [None]:
fig, ax = plt.subplots()
x_axis = np.arange(1, T+1)
y_axis = np.array(test_accuracy)
ax.plot(x_axis, y_axis)

ax.set(xlabel='Number of Rounds', ylabel='Test Accuracy')
ax.grid()