In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.01
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-3
no_users = sum(cluster_sizes)
batch_size = 20
epochs = 1
it = 1000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.125      0.         0.14285714 0.         0.
  0.         0.         0.16666667 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.125      0.         0.         0.125      0.125      0.125
  0.125      0.125      0.125      0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.14285714 0.         0.16666667
  0.16666667 0.14285714 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.16666667]
 [0.14285714 0.125      0.14285714 0.         0.         0.
  0.14285714 0.14285714 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.14285714]
 [0.         0.125      0.         0.         0.         0.
  0.16666667 0.         0.16666667 0.         0.         0.
  0.         0.         0.16666667 0.         0. 

In [4]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [5]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [6]:
datapoints = {}
count = 0
W1 = np.array([2, 2])
W2 = np.array([-2, 2])
W = [W1, W2]
m = 200
n = 2
noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i]) + np.random.normal(0,noise_sd)
        datapoints[count] = {
                'features': features,
                'degree': node_degree(count, G),
                'label': label,
                'neighbors': get_neighbors(count, G)
            }
        count += 1

In [7]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [8]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(2, 1, bias=False)
        #self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        #x = F.relu(self.fc1(x))
        output = self.fc1(x)
        #output = self.fc3(x)
        return output

In [9]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [10]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(6.5186, grad_fn=<MseLossBackward0>) tensor([ 5.1784, -2.6586]) tensor([-0.2075,  0.5876], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(3.9923, grad_fn=<MseLossBackward0>) tensor([ 3.2408, -1.6909]) tensor([-0.2593,  0.6142], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(4.6513, grad_fn=<MseLossBackward0>) tensor([ 3.5628, -2.3494]) tensor([-0.2917,  0.6311], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(3.1075, grad_fn=<MseLossBackward0>) tensor([ 2.2779, -1.7873]) tensor([-0.3273,  0.6546], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(5.5905, grad_fn=<MseLossBackward0>) tensor([ 4.7732, -2.4902]) tensor([-0.3501,  0.6725], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(3.4250, grad_fn=<MseLossBackward0>) tensor([ 2.9686, -1.6073]) tensor([-0.3978,  0.6974], grad_fn=<CatBackward0>)
torch.Size([50, 1])
to

16 tensor(0.3663, grad_fn=<MseLossBackward0>) tensor([ 0.7944, -0.7125]) tensor([-1.5342,  1.4909], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
16 tensor(0.4332, grad_fn=<MseLossBackward0>) tensor([ 0.9737, -0.8379]) tensor([-1.5422,  1.4981], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
16 tensor(0.3014, grad_fn=<MseLossBackward0>) tensor([ 0.5761, -0.6981]) tensor([-1.5519,  1.5064], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.5024, grad_fn=<MseLossBackward0>) tensor([ 1.3120, -0.8724]) tensor([-1.5577,  1.5134], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.3169, grad_fn=<MseLossBackward0>) tensor([ 0.7274, -0.6730]) tensor([-1.5708,  1.5221], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.3746, grad_fn=<MseLossBackward0>) tensor([ 0.8986, -0.7854]) tensor([-1.5781,  1.5289], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(0.2614

31 tensor(0.0370, grad_fn=<MseLossBackward0>) tensor([ 0.1596, -0.2785]) tensor([-1.8672,  1.8103], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0566, grad_fn=<MseLossBackward0>) tensor([ 0.3997, -0.3255]) tensor([-1.8688,  1.8131], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0379, grad_fn=<MseLossBackward0>) tensor([ 0.1935, -0.2785]) tensor([-1.8728,  1.8163], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0439, grad_fn=<MseLossBackward0>) tensor([ 0.2726, -0.2964]) tensor([-1.8747,  1.8191], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(0.0323, grad_fn=<MseLossBackward0>) tensor([ 0.1466, -0.2617]) tensor([-1.8775,  1.8221], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
33 tensor(0.0491, grad_fn=<MseLossBackward0>) tensor([ 0.3695, -0.3048]) tensor([-1.8789,  1.8247], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
33 tensor(0.0330

50 tensor(0.0045, grad_fn=<MseLossBackward0>) tensor([ 0.0989, -0.0995]) tensor([-1.9686,  1.9415], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(0.0032, grad_fn=<MseLossBackward0>) tensor([ 0.0391, -0.0921]) tensor([-1.9696,  1.9425], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(0.0036, grad_fn=<MseLossBackward0>) tensor([ 0.0672, -0.0915]) tensor([-1.9700,  1.9434], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(0.0028, grad_fn=<MseLossBackward0>) tensor([ 0.0317, -0.0843]) tensor([-1.9707,  1.9443], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(0.0039, grad_fn=<MseLossBackward0>) tensor([ 0.0917, -0.0932]) tensor([-1.9710,  1.9452], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(0.0028, grad_fn=<MseLossBackward0>) tensor([ 0.0358, -0.0865]) tensor([-1.9719,  1.9461], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(0.0031

70 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0148, -0.0246]) tensor([-1.9936,  1.9846], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
70 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0059, -0.0235]) tensor([-1.9938,  1.9848], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0202, -0.0249]) tensor([-1.9938,  1.9851], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0059, -0.0244]) tensor([-1.9940,  1.9853], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0138, -0.0231]) tensor([-1.9941,  1.9856], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0054, -0.0220]) tensor([-1.9942,  1.9858], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
72 tensor(0.0002

90 tensor(1.6269e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0034, -0.0066]) tensor([-1.9986,  1.9958], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
90 tensor(1.4295e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0011, -0.0065]) tensor([-1.9986,  1.9959], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.6478e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0046, -0.0066]) tensor([-1.9986,  1.9960], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.3763e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0010, -0.0067]) tensor([-1.9987,  1.9960], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.4245e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0032, -0.0062]) tensor([-1.9987,  1.9961], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.2557e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0010, -0.0061]) tensor([-1.9987,  1.9962], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([

In [11]:
parameters_to_vector(model.parameters())

tensor([-1.9993,  1.9978], grad_fn=<CatBackward0>)

In [12]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [22]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [23]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0)
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [24]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [25]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([ 0.0944, -0.2440])


tensor([ 0.1888, -0.4880], grad_fn=<CatBackward0>)

In [26]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []


for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            weights = parameters_to_vector(model.parameters())
            mat_vec_sum = torch.zeros_like(weights)
            for k in G.neighbors(i):
                 mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(projected_weights[k][i] - projected_weights[i][k],
                                                                  torch.transpose(weights, -1, 0)))
            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * mat_vec_sum)
                                         
    projected_weights = []                                          
    update_ProjWeight(projection_list, projected_weights, first_run=False)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    for k in range(no_users):
      
      g_loss = testing(models[i], datapoints[i], 50, criterion)
      local_test_loss.append(g_loss)
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 3/1000 [00:00<01:21, 12.30it/s]

Training_loss 8.53738
Training_loss 8.50670
Training_loss 8.48036
Training_loss 8.45132


  1%|          | 7/1000 [00:00<01:14, 13.39it/s]

Training_loss 8.42911
Training_loss 8.39393
Training_loss 8.37661


  1%|          | 9/1000 [00:00<01:16, 12.98it/s]

Training_loss 8.35580
Training_loss 8.30744
Training_loss 8.29708


  1%|▏         | 13/1000 [00:01<01:18, 12.53it/s]

Training_loss 8.25255
Training_loss 8.21282
Training_loss 8.17620


  2%|▏         | 15/1000 [00:01<01:18, 12.58it/s]

Training_loss 8.14515
Training_loss 8.10755
Training_loss 8.06990


  2%|▏         | 17/1000 [00:01<01:23, 11.78it/s]

Training_loss 8.03941
Training_loss 8.00898


  2%|▏         | 21/1000 [00:01<01:32, 10.61it/s]

Training_loss 7.98718
Training_loss 7.96049
Training_loss 7.92490


  2%|▏         | 23/1000 [00:01<01:27, 11.22it/s]

Training_loss 7.89574
Training_loss 7.88056
Training_loss 7.84332


  3%|▎         | 27/1000 [00:02<01:24, 11.45it/s]

Training_loss 7.81505
Training_loss 7.78383
Training_loss 7.75723


  3%|▎         | 29/1000 [00:02<01:37, 10.00it/s]

Training_loss 7.73038
Training_loss 7.68149


  3%|▎         | 31/1000 [00:02<01:41,  9.55it/s]

Training_loss 7.65437
Training_loss 7.62496
Training_loss 7.60399


  3%|▎         | 33/1000 [00:03<01:43,  9.36it/s]

Training_loss 7.58463
Training_loss 7.55685


  4%|▎         | 36/1000 [00:03<01:46,  9.01it/s]

Training_loss 7.52570
Training_loss 7.47987


  4%|▍         | 39/1000 [00:03<01:37,  9.83it/s]

Training_loss 7.45608
Training_loss 7.42661
Training_loss 7.40617


  4%|▍         | 41/1000 [00:03<02:04,  7.68it/s]

Training_loss 7.37081
Training_loss 7.35382


  4%|▍         | 42/1000 [00:04<02:30,  6.38it/s]

Training_loss 7.32298


  4%|▍         | 44/1000 [00:04<02:58,  5.37it/s]

Training_loss 7.30077
Training_loss 7.26723


  5%|▍         | 46/1000 [00:05<03:00,  5.29it/s]

Training_loss 7.24859
Training_loss 7.22545


  5%|▍         | 48/1000 [00:05<02:38,  6.02it/s]

Training_loss 7.19230
Training_loss 7.16160


  5%|▌         | 50/1000 [00:05<02:14,  7.07it/s]

Training_loss 7.13350
Training_loss 7.11602


  5%|▌         | 52/1000 [00:05<02:05,  7.56it/s]

Training_loss 7.09933
Training_loss 7.07748


  5%|▌         | 54/1000 [00:06<01:57,  8.07it/s]

Training_loss 7.04929
Training_loss 7.01933
Training_loss 6.98927


  6%|▌         | 56/1000 [00:06<01:49,  8.60it/s]

Training_loss 6.97184
Training_loss 6.93256
Training_loss 6.91486


  6%|▌         | 59/1000 [00:06<01:52,  8.34it/s]

Training_loss 6.88166


  6%|▌         | 60/1000 [00:06<02:16,  6.87it/s]

Training_loss 6.85551


  6%|▌         | 62/1000 [00:07<02:37,  5.95it/s]

Training_loss 6.83943
Training_loss 6.82066


  6%|▋         | 64/1000 [00:07<02:17,  6.80it/s]

Training_loss 6.79650
Training_loss 6.76668


  7%|▋         | 66/1000 [00:07<02:02,  7.63it/s]

Training_loss 6.74649
Training_loss 6.73180


  7%|▋         | 68/1000 [00:07<01:55,  8.03it/s]

Training_loss 6.69778
Training_loss 6.67856


  7%|▋         | 69/1000 [00:08<01:53,  8.18it/s]

Training_loss 6.65470
Training_loss 6.62453


  7%|▋         | 72/1000 [00:08<01:51,  8.33it/s]

Training_loss 6.60185
Training_loss 6.58558


  8%|▊         | 75/1000 [00:08<01:45,  8.76it/s]

Training_loss 6.56278
Training_loss 6.54685
Training_loss 6.52453


  8%|▊         | 78/1000 [00:09<01:37,  9.49it/s]

Training_loss 6.50587
Training_loss 6.48437
Training_loss 6.45506


  8%|▊         | 80/1000 [00:09<01:32,  9.93it/s]

Training_loss 6.43105
Training_loss 6.40580
Training_loss 6.38230


  8%|▊         | 84/1000 [00:09<01:26, 10.61it/s]

Training_loss 6.36540
Training_loss 6.34804
Training_loss 6.31646


  9%|▊         | 86/1000 [00:09<01:25, 10.72it/s]

Training_loss 6.29378
Training_loss 6.26283
Training_loss 6.22947


  9%|▉         | 90/1000 [00:10<01:21, 11.16it/s]

Training_loss 6.20831
Training_loss 6.17776
Training_loss 6.15770


  9%|▉         | 92/1000 [00:10<01:24, 10.81it/s]

Training_loss 6.14571
Training_loss 6.12926
Training_loss 6.11803


 10%|▉         | 96/1000 [00:10<01:21, 11.08it/s]

Training_loss 6.09636
Training_loss 6.07209
Training_loss 6.05767


 10%|▉         | 98/1000 [00:10<01:17, 11.66it/s]

Training_loss 6.02303
Training_loss 6.00800
Training_loss 5.97793


 10%|█         | 102/1000 [00:11<01:14, 12.04it/s]

Training_loss 5.94989
Training_loss 5.91714
Training_loss 5.90185


 10%|█         | 104/1000 [00:11<01:12, 12.29it/s]

Training_loss 5.87531
Training_loss 5.85292
Training_loss 5.82131


 11%|█         | 108/1000 [00:11<01:16, 11.74it/s]

Training_loss 5.80285
Training_loss 5.78610
Training_loss 5.77175


 11%|█         | 110/1000 [00:11<01:31,  9.74it/s]

Training_loss 5.75488
Training_loss 5.73552


 11%|█         | 112/1000 [00:12<01:40,  8.82it/s]

Training_loss 5.72025
Training_loss 5.70524
Training_loss 5.68869


 11%|█▏        | 114/1000 [00:12<01:31,  9.71it/s]

Training_loss 5.67350
Training_loss 5.64397


 12%|█▏        | 118/1000 [00:12<01:33,  9.40it/s]

Training_loss 5.62777
Training_loss 5.61619
Training_loss 5.59078


 12%|█▏        | 120/1000 [00:13<01:33,  9.42it/s]

Training_loss 5.57487
Training_loss 5.53317


 12%|█▏        | 123/1000 [00:13<01:36,  9.10it/s]

Training_loss 5.51588
Training_loss 5.49856
Training_loss 5.48474


 12%|█▎        | 125/1000 [00:13<01:29,  9.74it/s]

Training_loss 5.45241
Training_loss 5.43245
Training_loss 5.41014


 13%|█▎        | 128/1000 [00:13<01:28,  9.86it/s]

Training_loss 5.38779
Training_loss 5.37018
Training_loss 5.35466


 13%|█▎        | 132/1000 [00:14<01:29,  9.66it/s]

Training_loss 5.32651
Training_loss 5.31332
Training_loss 5.29267


 13%|█▎        | 134/1000 [00:14<01:23, 10.36it/s]

Training_loss 5.27457
Training_loss 5.25281


 14%|█▎        | 136/1000 [00:14<01:30,  9.50it/s]

Training_loss 5.23342
Training_loss 5.22374
Training_loss 5.19104


 14%|█▍        | 140/1000 [00:15<01:20, 10.64it/s]

Training_loss 5.17592
Training_loss 5.14964
Training_loss 5.12748


 14%|█▍        | 142/1000 [00:15<01:26,  9.97it/s]

Training_loss 5.10126
Training_loss 5.07664
Training_loss 5.06335


 15%|█▍        | 146/1000 [00:15<01:22, 10.32it/s]

Training_loss 5.05023
Training_loss 5.02985
Training_loss 5.01196


 15%|█▍        | 148/1000 [00:15<01:16, 11.11it/s]

Training_loss 4.99693
Training_loss 4.97539
Training_loss 4.95703


 15%|█▌        | 152/1000 [00:16<01:16, 11.11it/s]

Training_loss 4.93811
Training_loss 4.91832
Training_loss 4.89343


 15%|█▌        | 154/1000 [00:16<01:18, 10.73it/s]

Training_loss 4.87409
Training_loss 4.84669
Training_loss 4.83016


 16%|█▌        | 156/1000 [00:16<01:20, 10.52it/s]

Training_loss 4.81200
Training_loss 4.80391


 16%|█▌        | 159/1000 [00:16<01:26,  9.68it/s]

Training_loss 4.78093
Training_loss 4.76756
Training_loss 4.75093


 16%|█▋        | 163/1000 [00:17<01:22, 10.20it/s]

Training_loss 4.72602
Training_loss 4.71591
Training_loss 4.69837


 16%|█▋        | 165/1000 [00:17<01:15, 11.05it/s]

Training_loss 4.68528
Training_loss 4.66466
Training_loss 4.65845


 17%|█▋        | 169/1000 [00:17<01:09, 12.00it/s]

Training_loss 4.64493
Training_loss 4.62260
Training_loss 4.60740


 17%|█▋        | 173/1000 [00:18<01:03, 13.07it/s]

Training_loss 4.59683
Training_loss 4.57591
Training_loss 4.55917
Training_loss 4.54084


 18%|█▊        | 175/1000 [00:18<01:02, 13.10it/s]

Training_loss 4.52407
Training_loss 4.50657
Training_loss 4.49480


 18%|█▊        | 179/1000 [00:18<01:01, 13.27it/s]

Training_loss 4.48026
Training_loss 4.47107
Training_loss 4.44775


 18%|█▊        | 181/1000 [00:18<01:00, 13.49it/s]

Training_loss 4.43362
Training_loss 4.40160
Training_loss 4.38572


 18%|█▊        | 183/1000 [00:18<01:02, 13.11it/s]

Training_loss 4.37224
Training_loss 4.35066


 18%|█▊        | 185/1000 [00:19<01:12, 11.20it/s]

Training_loss 4.33194
Training_loss 4.31602


 19%|█▊        | 187/1000 [00:19<01:16, 10.63it/s]

Training_loss 4.29941
Training_loss 4.28842


 19%|█▉        | 191/1000 [00:19<01:21,  9.94it/s]

Training_loss 4.27590
Training_loss 4.26703
Training_loss 4.25187


 19%|█▉        | 193/1000 [00:19<01:21,  9.94it/s]

Training_loss 4.22727
Training_loss 4.21498


 20%|█▉        | 195/1000 [00:20<01:24,  9.48it/s]

Training_loss 4.20184
Training_loss 4.18392
Training_loss 4.17126


 20%|█▉        | 197/1000 [00:20<01:20,  9.99it/s]

Training_loss 4.15698
Training_loss 4.14004


 20%|██        | 200/1000 [00:20<01:25,  9.33it/s]

Training_loss 4.12496
Training_loss 4.11209


 20%|██        | 203/1000 [00:20<01:18, 10.11it/s]

Training_loss 4.09865
Training_loss 4.08815
Training_loss 4.07337


 20%|██        | 205/1000 [00:21<01:26,  9.20it/s]

Training_loss 4.05157
Training_loss 4.03139


 21%|██        | 208/1000 [00:21<01:16, 10.36it/s]

Training_loss 4.01634
Training_loss 4.00108
Training_loss 3.97852


 21%|██        | 210/1000 [00:21<01:16, 10.37it/s]

Training_loss 3.96371
Training_loss 3.95074
Training_loss 3.94439


 21%|██▏       | 214/1000 [00:21<01:12, 10.87it/s]

Training_loss 3.92429
Training_loss 3.91591
Training_loss 3.89893


 22%|██▏       | 216/1000 [00:22<01:16, 10.31it/s]

Training_loss 3.88359
Training_loss 3.87548
Training_loss 3.86478


 22%|██▏       | 220/1000 [00:22<01:08, 11.39it/s]

Training_loss 3.84946
Training_loss 3.83389
Training_loss 3.82314


 22%|██▏       | 222/1000 [00:22<01:13, 10.64it/s]

Training_loss 3.81547
Training_loss 3.80113
Training_loss 3.78195


 22%|██▏       | 224/1000 [00:22<01:14, 10.45it/s]

Training_loss 3.77149
Training_loss 3.75875


 23%|██▎       | 228/1000 [00:23<01:17,  9.92it/s]

Training_loss 3.73809
Training_loss 3.72811
Training_loss 3.71451


 23%|██▎       | 231/1000 [00:23<01:19,  9.70it/s]

Training_loss 3.70356
Training_loss 3.69303
Training_loss 3.67716


 23%|██▎       | 233/1000 [00:23<01:14, 10.28it/s]

Training_loss 3.66496
Training_loss 3.65084
Training_loss 3.63491


 24%|██▎       | 237/1000 [00:24<01:04, 11.82it/s]

Training_loss 3.61525
Training_loss 3.59232
Training_loss 3.57916


 24%|██▍       | 239/1000 [00:24<01:02, 12.26it/s]

Training_loss 3.56000
Training_loss 3.55027
Training_loss 3.53725


 24%|██▍       | 243/1000 [00:24<00:58, 12.93it/s]

Training_loss 3.52885
Training_loss 3.51683
Training_loss 3.50119


 25%|██▍       | 247/1000 [00:24<00:56, 13.33it/s]

Training_loss 3.48762
Training_loss 3.47440
Training_loss 3.46145
Training_loss 3.45099


 25%|██▍       | 249/1000 [00:24<00:55, 13.44it/s]

Training_loss 3.43520
Training_loss 3.42224
Training_loss 3.40742
Training_loss 3.39923


 25%|██▌       | 253/1000 [00:25<00:57, 12.90it/s]

Training_loss 3.38869
Training_loss 3.37902
Training_loss 3.37442


 26%|██▌       | 257/1000 [00:25<01:00, 12.20it/s]

Training_loss 3.35817
Training_loss 3.34366
Training_loss 3.33440


 26%|██▌       | 259/1000 [00:25<00:59, 12.47it/s]

Training_loss 3.32041
Training_loss 3.30846
Training_loss 3.29646


 26%|██▋       | 263/1000 [00:26<00:57, 12.90it/s]

Training_loss 3.28532
Training_loss 3.28052
Training_loss 3.27223


 27%|██▋       | 267/1000 [00:26<00:55, 13.26it/s]

Training_loss 3.25606
Training_loss 3.24297
Training_loss 3.23079
Training_loss 3.21901


 27%|██▋       | 269/1000 [00:26<00:55, 13.15it/s]

Training_loss 3.21066
Training_loss 3.19433
Training_loss 3.17700
Training_loss 3.16442


 27%|██▋       | 273/1000 [00:26<00:55, 13.18it/s]

Training_loss 3.15980
Training_loss 3.14695
Training_loss 3.13620


 28%|██▊       | 277/1000 [00:27<01:00, 12.03it/s]

Training_loss 3.12415
Training_loss 3.11107
Training_loss 3.10170


 28%|██▊       | 279/1000 [00:27<01:03, 11.37it/s]

Training_loss 3.08508
Training_loss 3.07533
Training_loss 3.06494


 28%|██▊       | 283/1000 [00:27<01:06, 10.76it/s]

Training_loss 3.05244
Training_loss 3.04196
Training_loss 3.02971


 28%|██▊       | 285/1000 [00:28<01:06, 10.77it/s]

Training_loss 3.01363
Training_loss 3.00518
Training_loss 2.99092


 29%|██▉       | 289/1000 [00:28<01:00, 11.68it/s]

Training_loss 2.98305
Training_loss 2.97049
Training_loss 2.95995


 29%|██▉       | 291/1000 [00:28<00:58, 12.16it/s]

Training_loss 2.94992
Training_loss 2.93332
Training_loss 2.92484


 30%|██▉       | 295/1000 [00:28<00:55, 12.70it/s]

Training_loss 2.91490
Training_loss 2.91015
Training_loss 2.90139
Training_loss 2.88920


 30%|██▉       | 299/1000 [00:29<00:53, 13.10it/s]

Training_loss 2.88069
Training_loss 2.87264
Training_loss 2.86040
Training_loss 2.85321


 30%|███       | 303/1000 [00:29<00:52, 13.29it/s]

Training_loss 2.84076
Training_loss 2.82790
Training_loss 2.81867


 31%|███       | 307/1000 [00:29<00:52, 13.14it/s]

Training_loss 2.80593
Training_loss 2.79979
Training_loss 2.79129
Training_loss 2.78519


 31%|███       | 311/1000 [00:29<00:50, 13.52it/s]

Training_loss 2.77238
Training_loss 2.76145
Training_loss 2.74983
Training_loss 2.74224


 32%|███▏      | 315/1000 [00:30<00:49, 13.71it/s]

Training_loss 2.73671
Training_loss 2.72842
Training_loss 2.71505
Training_loss 2.69833


 32%|███▏      | 317/1000 [00:30<00:48, 14.10it/s]

Training_loss 2.69144
Training_loss 2.68159
Training_loss 2.67354


 32%|███▏      | 321/1000 [00:30<00:50, 13.34it/s]

Training_loss 2.66481
Training_loss 2.65382
Training_loss 2.64451


 32%|███▎      | 325/1000 [00:30<00:48, 13.97it/s]

Training_loss 2.62613
Training_loss 2.62115
Training_loss 2.61151
Training_loss 2.60235


 33%|███▎      | 327/1000 [00:31<00:49, 13.59it/s]

Training_loss 2.59435
Training_loss 2.58276
Training_loss 2.57081
Training_loss 2.56250


 33%|███▎      | 331/1000 [00:31<00:48, 13.75it/s]

Training_loss 2.55197
Training_loss 2.54165
Training_loss 2.53038


 34%|███▎      | 335/1000 [00:31<00:51, 12.89it/s]

Training_loss 2.52328
Training_loss 2.51473
Training_loss 2.49951


 34%|███▎      | 337/1000 [00:31<00:49, 13.39it/s]

Training_loss 2.49037
Training_loss 2.47732
Training_loss 2.46973


 34%|███▍      | 341/1000 [00:32<00:47, 13.83it/s]

Training_loss 2.46038
Training_loss 2.44918
Training_loss 2.44332
Training_loss 2.43288


 34%|███▍      | 345/1000 [00:32<00:46, 13.95it/s]

Training_loss 2.42495
Training_loss 2.41745
Training_loss 2.40990


 35%|███▍      | 347/1000 [00:32<00:48, 13.53it/s]

Training_loss 2.40234
Training_loss 2.39602
Training_loss 2.38469
Training_loss 2.37806


 35%|███▌      | 351/1000 [00:32<00:47, 13.55it/s]

Training_loss 2.36950
Training_loss 2.36116
Training_loss 2.35454


 36%|███▌      | 355/1000 [00:33<00:52, 12.31it/s]

Training_loss 2.34741
Training_loss 2.33848
Training_loss 2.32763


 36%|███▌      | 357/1000 [00:33<00:56, 11.39it/s]

Training_loss 2.32123
Training_loss 2.31332


 36%|███▌      | 359/1000 [00:33<01:03, 10.08it/s]

Training_loss 2.30283
Training_loss 2.29539


 36%|███▌      | 361/1000 [00:33<01:00, 10.65it/s]

Training_loss 2.28319
Training_loss 2.27377
Training_loss 2.27114


 36%|███▋      | 365/1000 [00:34<00:53, 11.97it/s]

Training_loss 2.26570
Training_loss 2.25068
Training_loss 2.24230


 37%|███▋      | 367/1000 [00:34<00:51, 12.35it/s]

Training_loss 2.23052
Training_loss 2.22366
Training_loss 2.21585


 37%|███▋      | 371/1000 [00:34<00:51, 12.14it/s]

Training_loss 2.20768
Training_loss 2.19786
Training_loss 2.18743


 37%|███▋      | 373/1000 [00:34<00:58, 10.78it/s]

Training_loss 2.18122
Training_loss 2.17115


 38%|███▊      | 375/1000 [00:35<01:00, 10.32it/s]

Training_loss 2.16405
Training_loss 2.15390
Training_loss 2.14855


 38%|███▊      | 379/1000 [00:35<00:56, 10.93it/s]

Training_loss 2.14028
Training_loss 2.13480
Training_loss 2.13005


 38%|███▊      | 381/1000 [00:35<00:55, 11.09it/s]

Training_loss 2.12101
Training_loss 2.11501
Training_loss 2.10948


 38%|███▊      | 385/1000 [00:36<00:57, 10.64it/s]

Training_loss 2.10144
Training_loss 2.09580
Training_loss 2.08999


 39%|███▊      | 387/1000 [00:36<00:56, 10.94it/s]

Training_loss 2.08340
Training_loss 2.07456
Training_loss 2.06526


 39%|███▉      | 391/1000 [00:36<00:55, 11.02it/s]

Training_loss 2.05845
Training_loss 2.05378
Training_loss 2.04645


 39%|███▉      | 393/1000 [00:36<00:53, 11.35it/s]

Training_loss 2.04124
Training_loss 2.02992
Training_loss 2.02312


 40%|███▉      | 397/1000 [00:37<00:51, 11.64it/s]

Training_loss 2.01316
Training_loss 2.00457
Training_loss 1.99637


 40%|███▉      | 399/1000 [00:37<00:53, 11.15it/s]

Training_loss 1.99099
Training_loss 1.98485
Training_loss 1.97582


 40%|████      | 403/1000 [00:37<00:49, 12.11it/s]

Training_loss 1.96777
Training_loss 1.96066
Training_loss 1.95692


 40%|████      | 405/1000 [00:37<00:47, 12.61it/s]

Training_loss 1.95077
Training_loss 1.93988
Training_loss 1.92988


 41%|████      | 409/1000 [00:38<00:46, 12.63it/s]

Training_loss 1.92018
Training_loss 1.91580
Training_loss 1.91000


 41%|████      | 411/1000 [00:38<00:45, 12.95it/s]

Training_loss 1.90352
Training_loss 1.89644
Training_loss 1.88830


 42%|████▏     | 415/1000 [00:38<00:45, 12.90it/s]

Training_loss 1.87867
Training_loss 1.86904
Training_loss 1.86057


 42%|████▏     | 417/1000 [00:38<00:46, 12.64it/s]

Training_loss 1.85111
Training_loss 1.84458
Training_loss 1.83629


 42%|████▏     | 421/1000 [00:38<00:44, 13.00it/s]

Training_loss 1.82948
Training_loss 1.82374
Training_loss 1.82010


 42%|████▏     | 423/1000 [00:39<00:44, 13.09it/s]

Training_loss 1.81487
Training_loss 1.80976
Training_loss 1.80365


 43%|████▎     | 427/1000 [00:39<00:42, 13.55it/s]

Training_loss 1.79919
Training_loss 1.79128
Training_loss 1.78661
Training_loss 1.77929


 43%|████▎     | 431/1000 [00:39<00:41, 13.59it/s]

Training_loss 1.77507
Training_loss 1.76960
Training_loss 1.76372


 43%|████▎     | 433/1000 [00:39<00:45, 12.49it/s]

Training_loss 1.75635
Training_loss 1.74951
Training_loss 1.74506


 44%|████▎     | 437/1000 [00:40<00:44, 12.71it/s]

Training_loss 1.73769
Training_loss 1.73172
Training_loss 1.72710
Training_loss 1.72161


 44%|████▍     | 441/1000 [00:40<00:42, 13.12it/s]

Training_loss 1.71769
Training_loss 1.71217
Training_loss 1.70570


 44%|████▍     | 443/1000 [00:40<00:42, 13.19it/s]

Training_loss 1.69570
Training_loss 1.69105
Training_loss 1.68401


 45%|████▍     | 447/1000 [00:40<00:41, 13.33it/s]

Training_loss 1.67771
Training_loss 1.67399
Training_loss 1.66899


 45%|████▍     | 449/1000 [00:41<00:41, 13.20it/s]

Training_loss 1.65974
Training_loss 1.65414
Training_loss 1.64768


 45%|████▌     | 453/1000 [00:41<00:40, 13.65it/s]

Training_loss 1.64119
Training_loss 1.63541
Training_loss 1.63051


 46%|████▌     | 457/1000 [00:41<00:41, 13.17it/s]

Training_loss 1.62417
Training_loss 1.61911
Training_loss 1.61303
Training_loss 1.60770


 46%|████▌     | 459/1000 [00:41<00:40, 13.28it/s]

Training_loss 1.60192
Training_loss 1.59663
Training_loss 1.58963


 46%|████▋     | 463/1000 [00:42<00:41, 12.81it/s]

Training_loss 1.58487
Training_loss 1.57649
Training_loss 1.56985


 46%|████▋     | 465/1000 [00:42<00:40, 13.23it/s]

Training_loss 1.56668
Training_loss 1.56175
Training_loss 1.55705
Training_loss 1.54997


 47%|████▋     | 469/1000 [00:42<00:40, 13.21it/s]

Training_loss 1.54516
Training_loss 1.54023
Training_loss 1.53623


 47%|████▋     | 473/1000 [00:42<00:39, 13.49it/s]

Training_loss 1.52854
Training_loss 1.52474
Training_loss 1.51813


 48%|████▊     | 475/1000 [00:43<00:39, 13.26it/s]

Training_loss 1.51078
Training_loss 1.50308
Training_loss 1.49669


 48%|████▊     | 479/1000 [00:43<00:38, 13.47it/s]

Training_loss 1.48874
Training_loss 1.48313
Training_loss 1.47364


 48%|████▊     | 481/1000 [00:43<00:38, 13.36it/s]

Training_loss 1.46911
Training_loss 1.46510
Training_loss 1.46162


 48%|████▊     | 485/1000 [00:43<00:38, 13.52it/s]

Training_loss 1.45720
Training_loss 1.45094
Training_loss 1.44581


 49%|████▊     | 487/1000 [00:43<00:39, 13.13it/s]

Training_loss 1.43903
Training_loss 1.43453
Training_loss 1.43054


 49%|████▉     | 491/1000 [00:44<00:40, 12.67it/s]

Training_loss 1.42677
Training_loss 1.42122
Training_loss 1.41407
Training_loss 1.41108


 50%|████▉     | 495/1000 [00:44<00:38, 13.08it/s]

Training_loss 1.40650
Training_loss 1.40026
Training_loss 1.39642


 50%|████▉     | 497/1000 [00:44<00:37, 13.31it/s]

Training_loss 1.39248
Training_loss 1.38989
Training_loss 1.38589


 50%|█████     | 501/1000 [00:45<00:36, 13.62it/s]

Training_loss 1.37958
Training_loss 1.37289
Training_loss 1.36908


 50%|█████     | 503/1000 [00:45<00:37, 13.10it/s]

Training_loss 1.36432
Training_loss 1.36109
Training_loss 1.35675


 51%|█████     | 507/1000 [00:45<00:36, 13.38it/s]

Training_loss 1.35327
Training_loss 1.34850
Training_loss 1.34174


 51%|█████     | 511/1000 [00:45<00:36, 13.58it/s]

Training_loss 1.33460
Training_loss 1.33038
Training_loss 1.32527
Training_loss 1.32137


 52%|█████▏    | 515/1000 [00:46<00:36, 13.45it/s]

Training_loss 1.31670
Training_loss 1.31070
Training_loss 1.30612
Training_loss 1.30237


 52%|█████▏    | 517/1000 [00:46<00:36, 13.23it/s]

Training_loss 1.29862
Training_loss 1.29526
Training_loss 1.29251


 52%|█████▏    | 521/1000 [00:46<00:37, 12.93it/s]

Training_loss 1.28678
Training_loss 1.28245
Training_loss 1.27848


 52%|█████▏    | 523/1000 [00:46<00:36, 13.03it/s]

Training_loss 1.27241
Training_loss 1.26682
Training_loss 1.26366


 53%|█████▎    | 527/1000 [00:46<00:35, 13.32it/s]

Training_loss 1.25933
Training_loss 1.25474
Training_loss 1.24949


 53%|█████▎    | 531/1000 [00:47<00:34, 13.52it/s]

Training_loss 1.24482
Training_loss 1.24188
Training_loss 1.23676
Training_loss 1.23175


 53%|█████▎    | 533/1000 [00:47<00:33, 13.94it/s]

Training_loss 1.22614
Training_loss 1.22295
Training_loss 1.21778


 54%|█████▎    | 537/1000 [00:47<00:33, 13.82it/s]

Training_loss 1.21474
Training_loss 1.20955
Training_loss 1.20409


 54%|█████▍    | 539/1000 [00:47<00:33, 13.73it/s]

Training_loss 1.19840
Training_loss 1.19458
Training_loss 1.18848


 54%|█████▍    | 543/1000 [00:48<00:34, 13.12it/s]

Training_loss 1.18349
Training_loss 1.17687
Training_loss 1.17209


 55%|█████▍    | 545/1000 [00:48<00:34, 13.05it/s]

Training_loss 1.16989
Training_loss 1.16519
Training_loss 1.16074


 55%|█████▍    | 549/1000 [00:48<00:36, 12.42it/s]

Training_loss 1.15640
Training_loss 1.15232
Training_loss 1.14919


 55%|█████▌    | 553/1000 [00:48<00:34, 13.09it/s]

Training_loss 1.14467
Training_loss 1.13834
Training_loss 1.13434
Training_loss 1.12942


 56%|█████▌    | 557/1000 [00:49<00:33, 13.28it/s]

Training_loss 1.12542
Training_loss 1.12324
Training_loss 1.11793
Training_loss 1.11267


 56%|█████▌    | 561/1000 [00:49<00:30, 14.19it/s]

Training_loss 1.11050
Training_loss 1.10631
Training_loss 1.10275
Training_loss 1.09849


 56%|█████▋    | 563/1000 [00:49<00:31, 13.70it/s]

Training_loss 1.09545
Training_loss 1.09187
Training_loss 1.08784


 57%|█████▋    | 567/1000 [00:49<00:30, 14.06it/s]

Training_loss 1.08406
Training_loss 1.07989
Training_loss 1.07642


 57%|█████▋    | 569/1000 [00:50<00:31, 13.49it/s]

Training_loss 1.07000
Training_loss 1.06707
Training_loss 1.06430


 57%|█████▋    | 573/1000 [00:50<00:30, 14.03it/s]

Training_loss 1.06052
Training_loss 1.05883
Training_loss 1.05428


 58%|█████▊    | 577/1000 [00:50<00:32, 13.18it/s]

Training_loss 1.05185
Training_loss 1.04772
Training_loss 1.04492
Training_loss 1.03921


 58%|█████▊    | 579/1000 [00:50<00:31, 13.35it/s]

Training_loss 1.03513
Training_loss 1.03194


 58%|█████▊    | 581/1000 [00:51<00:36, 11.47it/s]

Training_loss 1.02821
Training_loss 1.02597
Training_loss 1.02259


 58%|█████▊    | 585/1000 [00:51<00:33, 12.28it/s]

Training_loss 1.01876
Training_loss 1.01413
Training_loss 1.01199


 59%|█████▊    | 587/1000 [00:51<00:33, 12.49it/s]

Training_loss 1.00851
Training_loss 1.00467
Training_loss 1.00113


 59%|█████▉    | 591/1000 [00:51<00:31, 13.07it/s]

Training_loss 0.99854
Training_loss 0.99596
Training_loss 0.99086


 59%|█████▉    | 593/1000 [00:52<00:33, 12.14it/s]

Training_loss 0.98659
Training_loss 0.98372
Training_loss 0.98073


 60%|█████▉    | 597/1000 [00:52<00:32, 12.23it/s]

Training_loss 0.97789
Training_loss 0.97486
Training_loss 0.97202


 60%|█████▉    | 599/1000 [00:52<00:32, 12.28it/s]

Training_loss 0.96955
Training_loss 0.96632
Training_loss 0.96242


 60%|██████    | 603/1000 [00:52<00:35, 11.13it/s]

Training_loss 0.95947
Training_loss 0.95457
Training_loss 0.95198


 60%|██████    | 605/1000 [00:53<00:33, 11.75it/s]

Training_loss 0.94829
Training_loss 0.94681
Training_loss 0.94340


 61%|██████    | 609/1000 [00:53<00:30, 12.95it/s]

Training_loss 0.94086
Training_loss 0.93856
Training_loss 0.93629


 61%|██████    | 611/1000 [00:53<00:30, 12.93it/s]

Training_loss 0.93284
Training_loss 0.92940
Training_loss 0.92716


 62%|██████▏   | 615/1000 [00:53<00:29, 13.12it/s]

Training_loss 0.92513
Training_loss 0.92179
Training_loss 0.91820


 62%|██████▏   | 617/1000 [00:53<00:28, 13.41it/s]

Training_loss 0.91487
Training_loss 0.91238
Training_loss 0.90994


 62%|██████▏   | 621/1000 [00:54<00:28, 13.47it/s]

Training_loss 0.90742
Training_loss 0.90424
Training_loss 0.90144


 62%|██████▏   | 623/1000 [00:54<00:28, 13.02it/s]

Training_loss 0.89688
Training_loss 0.89399
Training_loss 0.88958
Training_loss 0.88327


 63%|██████▎   | 627/1000 [00:54<00:29, 12.80it/s]

Training_loss 0.87991
Training_loss 0.87768
Training_loss 0.87525


 63%|██████▎   | 631/1000 [00:55<00:28, 13.00it/s]

Training_loss 0.87173
Training_loss 0.86795
Training_loss 0.86419


 64%|██████▎   | 635/1000 [00:55<00:27, 13.45it/s]

Training_loss 0.86127
Training_loss 0.85856
Training_loss 0.85537
Training_loss 0.85098


 64%|██████▍   | 639/1000 [00:55<00:26, 13.77it/s]

Training_loss 0.84710
Training_loss 0.84503
Training_loss 0.84258
Training_loss 0.84076


 64%|██████▍   | 641/1000 [00:55<00:26, 13.48it/s]

Training_loss 0.83868
Training_loss 0.83595
Training_loss 0.83335


 64%|██████▍   | 645/1000 [00:56<00:28, 12.28it/s]

Training_loss 0.82998
Training_loss 0.82730
Training_loss 0.82360


 65%|██████▍   | 647/1000 [00:56<00:29, 11.88it/s]

Training_loss 0.81964
Training_loss 0.81702
Training_loss 0.81528


 65%|██████▌   | 651/1000 [00:56<00:28, 12.08it/s]

Training_loss 0.81295
Training_loss 0.81013
Training_loss 0.80777


 65%|██████▌   | 653/1000 [00:56<00:28, 12.08it/s]

Training_loss 0.80559
Training_loss 0.80231
Training_loss 0.80025


 66%|██████▌   | 657/1000 [00:57<00:27, 12.58it/s]

Training_loss 0.79827
Training_loss 0.79587
Training_loss 0.79379


 66%|██████▌   | 659/1000 [00:57<00:26, 12.98it/s]

Training_loss 0.79103
Training_loss 0.78787
Training_loss 0.78580


 66%|██████▋   | 663/1000 [00:57<00:25, 13.24it/s]

Training_loss 0.78370
Training_loss 0.78104
Training_loss 0.77889


 67%|██████▋   | 667/1000 [00:57<00:24, 13.51it/s]

Training_loss 0.77716
Training_loss 0.77452
Training_loss 0.77164
Training_loss 0.77009


 67%|██████▋   | 671/1000 [00:58<00:23, 13.71it/s]

Training_loss 0.76698
Training_loss 0.76487
Training_loss 0.76122
Training_loss 0.75913


 67%|██████▋   | 673/1000 [00:58<00:23, 13.84it/s]

Training_loss 0.75681
Training_loss 0.75378
Training_loss 0.75217


 68%|██████▊   | 677/1000 [00:58<00:23, 14.01it/s]

Training_loss 0.74887
Training_loss 0.74606
Training_loss 0.74463


 68%|██████▊   | 679/1000 [00:58<00:23, 13.84it/s]

Training_loss 0.74234
Training_loss 0.73871
Training_loss 0.73609


 68%|██████▊   | 683/1000 [00:59<00:24, 13.10it/s]

Training_loss 0.73291
Training_loss 0.72982
Training_loss 0.72644


 68%|██████▊   | 685/1000 [00:59<00:24, 12.86it/s]

Training_loss 0.72450
Training_loss 0.72243
Training_loss 0.71803


 69%|██████▉   | 689/1000 [00:59<00:23, 13.18it/s]

Training_loss 0.71661
Training_loss 0.71226
Training_loss 0.71042


 69%|██████▉   | 693/1000 [00:59<00:22, 13.75it/s]

Training_loss 0.70828
Training_loss 0.70626
Training_loss 0.70440
Training_loss 0.70162


 70%|██████▉   | 695/1000 [00:59<00:22, 13.62it/s]

Training_loss 0.69959
Training_loss 0.69700
Training_loss 0.69431


 70%|██████▉   | 699/1000 [01:00<00:22, 13.38it/s]

Training_loss 0.69119
Training_loss 0.68873
Training_loss 0.68599


 70%|███████   | 703/1000 [01:00<00:21, 13.61it/s]

Training_loss 0.68300
Training_loss 0.68018
Training_loss 0.67828
Training_loss 0.67556


 70%|███████   | 705/1000 [01:00<00:21, 13.50it/s]

Training_loss 0.67354
Training_loss 0.67161
Training_loss 0.66919
Training_loss 0.66599


 71%|███████   | 709/1000 [01:00<00:21, 13.49it/s]

Training_loss 0.66269
Training_loss 0.66103
Training_loss 0.65902


 71%|███████▏  | 713/1000 [01:01<00:21, 13.12it/s]

Training_loss 0.65677
Training_loss 0.65534
Training_loss 0.65351


 72%|███████▏  | 715/1000 [01:01<00:21, 13.13it/s]

Training_loss 0.65055
Training_loss 0.64896
Training_loss 0.64693


 72%|███████▏  | 719/1000 [01:01<00:20, 13.59it/s]

Training_loss 0.64402
Training_loss 0.64233
Training_loss 0.64073


 72%|███████▏  | 721/1000 [01:01<00:20, 13.60it/s]

Training_loss 0.63798
Training_loss 0.63566
Training_loss 0.63466


 72%|███████▎  | 725/1000 [01:02<00:20, 13.68it/s]

Training_loss 0.63265
Training_loss 0.63068
Training_loss 0.62824


 73%|███████▎  | 727/1000 [01:02<00:21, 12.97it/s]

Training_loss 0.62640
Training_loss 0.62344
Training_loss 0.62141


 73%|███████▎  | 731/1000 [01:02<00:20, 12.91it/s]

Training_loss 0.61884
Training_loss 0.61629
Training_loss 0.61399


 73%|███████▎  | 733/1000 [01:02<00:20, 12.96it/s]

Training_loss 0.61250
Training_loss 0.61047
Training_loss 0.60731


 74%|███████▎  | 737/1000 [01:03<00:19, 13.16it/s]

Training_loss 0.60592
Training_loss 0.60418
Training_loss 0.60183


 74%|███████▍  | 739/1000 [01:03<00:20, 12.94it/s]

Training_loss 0.59974
Training_loss 0.59694
Training_loss 0.59549


 74%|███████▍  | 743/1000 [01:03<00:19, 13.32it/s]

Training_loss 0.59321
Training_loss 0.59117
Training_loss 0.58800


 74%|███████▍  | 745/1000 [01:03<00:18, 13.51it/s]

Training_loss 0.58661
Training_loss 0.58480
Training_loss 0.58181


 75%|███████▍  | 749/1000 [01:03<00:17, 14.15it/s]

Training_loss 0.57895
Training_loss 0.57678
Training_loss 0.57408


 75%|███████▌  | 751/1000 [01:04<00:19, 12.74it/s]

Training_loss 0.57229
Training_loss 0.57048
Training_loss 0.56848


 76%|███████▌  | 755/1000 [01:04<00:19, 12.76it/s]

Training_loss 0.56642
Training_loss 0.56542
Training_loss 0.56376


 76%|███████▌  | 759/1000 [01:04<00:17, 13.41it/s]

Training_loss 0.56088
Training_loss 0.55793
Training_loss 0.55562
Training_loss 0.55400


 76%|███████▋  | 763/1000 [01:04<00:17, 13.89it/s]

Training_loss 0.55227
Training_loss 0.55128
Training_loss 0.54952
Training_loss 0.54808


 76%|███████▋  | 765/1000 [01:05<00:18, 13.03it/s]

Training_loss 0.54652
Training_loss 0.54458
Training_loss 0.54282


 77%|███████▋  | 769/1000 [01:05<00:17, 12.90it/s]

Training_loss 0.54041
Training_loss 0.53821
Training_loss 0.53538


 77%|███████▋  | 771/1000 [01:05<00:17, 13.31it/s]

Training_loss 0.53318
Training_loss 0.53108
Training_loss 0.52960


 78%|███████▊  | 775/1000 [01:05<00:16, 13.39it/s]

Training_loss 0.52813
Training_loss 0.52693
Training_loss 0.52540


 78%|███████▊  | 777/1000 [01:06<00:16, 13.25it/s]

Training_loss 0.52422
Training_loss 0.52231
Training_loss 0.52001


 78%|███████▊  | 781/1000 [01:06<00:16, 13.45it/s]

Training_loss 0.51854
Training_loss 0.51559
Training_loss 0.51386


 78%|███████▊  | 783/1000 [01:06<00:16, 13.08it/s]

Training_loss 0.51277
Training_loss 0.51045
Training_loss 0.50896


 79%|███████▊  | 787/1000 [01:06<00:16, 13.30it/s]

Training_loss 0.50708
Training_loss 0.50540
Training_loss 0.50370


 79%|███████▉  | 789/1000 [01:06<00:15, 13.53it/s]

Training_loss 0.50135
Training_loss 0.49981
Training_loss 0.49874
Training_loss 0.49767


 79%|███████▉  | 793/1000 [01:07<00:15, 13.31it/s]

Training_loss 0.49683
Training_loss 0.49516
Training_loss 0.49390


 80%|███████▉  | 795/1000 [01:07<00:15, 13.38it/s]

Training_loss 0.49070
Training_loss 0.48946


 80%|███████▉  | 799/1000 [01:07<00:16, 12.23it/s]

Training_loss 0.48774
Training_loss 0.48669
Training_loss 0.48439


 80%|████████  | 801/1000 [01:07<00:16, 11.73it/s]

Training_loss 0.48268
Training_loss 0.48104
Training_loss 0.47919


 80%|████████  | 805/1000 [01:08<00:16, 11.86it/s]

Training_loss 0.47814
Training_loss 0.47620
Training_loss 0.47482


 81%|████████  | 807/1000 [01:08<00:16, 12.00it/s]

Training_loss 0.47339
Training_loss 0.47166
Training_loss 0.46962


 81%|████████  | 811/1000 [01:08<00:16, 11.77it/s]

Training_loss 0.46829
Training_loss 0.46617
Training_loss 0.46526


 81%|████████▏ | 813/1000 [01:09<00:15, 11.77it/s]

Training_loss 0.46427
Training_loss 0.46272
Training_loss 0.46131


 82%|████████▏ | 817/1000 [01:09<00:14, 12.57it/s]

Training_loss 0.46035
Training_loss 0.45923
Training_loss 0.45790


 82%|████████▏ | 819/1000 [01:09<00:14, 12.44it/s]

Training_loss 0.45604
Training_loss 0.45451
Training_loss 0.45282


 82%|████████▏ | 823/1000 [01:09<00:13, 12.88it/s]

Training_loss 0.45177
Training_loss 0.44908
Training_loss 0.44755


 82%|████████▎ | 825/1000 [01:09<00:13, 12.94it/s]

Training_loss 0.44548
Training_loss 0.44263
Training_loss 0.44193


 83%|████████▎ | 829/1000 [01:10<00:12, 13.21it/s]

Training_loss 0.43936
Training_loss 0.43814
Training_loss 0.43696


 83%|████████▎ | 833/1000 [01:10<00:12, 13.36it/s]

Training_loss 0.43556
Training_loss 0.43426
Training_loss 0.43285
Training_loss 0.43103


 84%|████████▎ | 835/1000 [01:10<00:12, 13.53it/s]

Training_loss 0.43002
Training_loss 0.42932
Training_loss 0.42795


 84%|████████▍ | 839/1000 [01:10<00:11, 13.58it/s]

Training_loss 0.42652
Training_loss 0.42570
Training_loss 0.42333


 84%|████████▍ | 841/1000 [01:11<00:11, 13.26it/s]

Training_loss 0.42217
Training_loss 0.42099
Training_loss 0.41914


 84%|████████▍ | 845/1000 [01:11<00:11, 13.43it/s]

Training_loss 0.41763
Training_loss 0.41631
Training_loss 0.41441


 85%|████████▍ | 847/1000 [01:11<00:11, 13.07it/s]

Training_loss 0.41289
Training_loss 0.41173
Training_loss 0.41071


 85%|████████▌ | 851/1000 [01:11<00:11, 12.43it/s]

Training_loss 0.40949
Training_loss 0.40803
Training_loss 0.40653


 85%|████████▌ | 853/1000 [01:12<00:12, 11.35it/s]

Training_loss 0.40503
Training_loss 0.40387


 86%|████████▌ | 855/1000 [01:12<00:14,  9.82it/s]

Training_loss 0.40315
Training_loss 0.40148


 86%|████████▌ | 857/1000 [01:12<00:15,  9.04it/s]

Training_loss 0.40070
Training_loss 0.39953


 86%|████████▌ | 860/1000 [01:12<00:13, 10.21it/s]

Training_loss 0.39829
Training_loss 0.39735
Training_loss 0.39600


 86%|████████▌ | 862/1000 [01:13<00:12, 10.95it/s]

Training_loss 0.39386
Training_loss 0.39287
Training_loss 0.39137


 87%|████████▋ | 866/1000 [01:13<00:11, 11.91it/s]

Training_loss 0.39051
Training_loss 0.38866
Training_loss 0.38702


 87%|████████▋ | 868/1000 [01:13<00:10, 12.42it/s]

Training_loss 0.38499
Training_loss 0.38359
Training_loss 0.38205


 87%|████████▋ | 872/1000 [01:13<00:09, 12.86it/s]

Training_loss 0.38039
Training_loss 0.37907
Training_loss 0.37754


 87%|████████▋ | 874/1000 [01:13<00:09, 12.79it/s]

Training_loss 0.37687
Training_loss 0.37517
Training_loss 0.37424


 88%|████████▊ | 876/1000 [01:14<00:10, 11.99it/s]

Training_loss 0.37263
Training_loss 0.37164


 88%|████████▊ | 880/1000 [01:14<00:11, 10.75it/s]

Training_loss 0.37057
Training_loss 0.36939
Training_loss 0.36789


 88%|████████▊ | 884/1000 [01:14<00:09, 11.64it/s]

Training_loss 0.36673
Training_loss 0.36559
Training_loss 0.36461
Training_loss 0.36373


 89%|████████▊ | 886/1000 [01:15<00:09, 12.05it/s]

Training_loss 0.36239
Training_loss 0.36140
Training_loss 0.35983


 89%|████████▉ | 890/1000 [01:15<00:08, 12.77it/s]

Training_loss 0.35909
Training_loss 0.35777
Training_loss 0.35671


 89%|████████▉ | 892/1000 [01:15<00:08, 12.65it/s]

Training_loss 0.35519
Training_loss 0.35433
Training_loss 0.35301


 90%|████████▉ | 896/1000 [01:15<00:08, 12.70it/s]

Training_loss 0.35223
Training_loss 0.35055
Training_loss 0.34914


 90%|████████▉ | 898/1000 [01:15<00:08, 12.44it/s]

Training_loss 0.34797
Training_loss 0.34686
Training_loss 0.34606


 90%|█████████ | 902/1000 [01:16<00:07, 12.64it/s]

Training_loss 0.34538
Training_loss 0.34456
Training_loss 0.34294


 90%|█████████ | 904/1000 [01:16<00:07, 12.45it/s]

Training_loss 0.34180
Training_loss 0.34006
Training_loss 0.33904


 91%|█████████ | 908/1000 [01:16<00:07, 12.13it/s]

Training_loss 0.33803
Training_loss 0.33684
Training_loss 0.33610


 91%|█████████ | 910/1000 [01:16<00:07, 12.42it/s]

Training_loss 0.33482
Training_loss 0.33333
Training_loss 0.33211


 91%|█████████▏| 914/1000 [01:17<00:06, 13.14it/s]

Training_loss 0.33040
Training_loss 0.32946
Training_loss 0.32780


 92%|█████████▏| 916/1000 [01:17<00:06, 12.88it/s]

Training_loss 0.32665
Training_loss 0.32516
Training_loss 0.32389


 92%|█████████▏| 920/1000 [01:17<00:06, 12.72it/s]

Training_loss 0.32231
Training_loss 0.32119
Training_loss 0.32027


 92%|█████████▏| 922/1000 [01:17<00:06, 12.11it/s]

Training_loss 0.31890
Training_loss 0.31795
Training_loss 0.31743


 93%|█████████▎| 926/1000 [01:18<00:05, 12.35it/s]

Training_loss 0.31653
Training_loss 0.31517
Training_loss 0.31451


 93%|█████████▎| 928/1000 [01:18<00:05, 12.30it/s]

Training_loss 0.31332
Training_loss 0.31167
Training_loss 0.31016


 93%|█████████▎| 932/1000 [01:18<00:05, 12.45it/s]

Training_loss 0.30902
Training_loss 0.30807
Training_loss 0.30738


 93%|█████████▎| 934/1000 [01:18<00:05, 11.72it/s]

Training_loss 0.30620
Training_loss 0.30458
Training_loss 0.30358


 94%|█████████▍| 938/1000 [01:19<00:05, 11.89it/s]

Training_loss 0.30250
Training_loss 0.30107
Training_loss 0.30036


 94%|█████████▍| 940/1000 [01:19<00:05, 11.95it/s]

Training_loss 0.29916
Training_loss 0.29800
Training_loss 0.29738


 94%|█████████▍| 944/1000 [01:19<00:04, 11.89it/s]

Training_loss 0.29613
Training_loss 0.29533
Training_loss 0.29419


 95%|█████████▍| 946/1000 [01:19<00:04, 11.96it/s]

Training_loss 0.29260
Training_loss 0.29083
Training_loss 0.29002


 95%|█████████▌| 950/1000 [01:20<00:04, 11.98it/s]

Training_loss 0.28956
Training_loss 0.28871
Training_loss 0.28743


 95%|█████████▌| 952/1000 [01:20<00:03, 12.02it/s]

Training_loss 0.28624
Training_loss 0.28513
Training_loss 0.28427


 96%|█████████▌| 956/1000 [01:20<00:03, 11.79it/s]

Training_loss 0.28345
Training_loss 0.28296
Training_loss 0.28195


 96%|█████████▌| 958/1000 [01:20<00:03, 11.77it/s]

Training_loss 0.28108
Training_loss 0.27993
Training_loss 0.27892


 96%|█████████▌| 962/1000 [01:21<00:03, 12.22it/s]

Training_loss 0.27776
Training_loss 0.27733
Training_loss 0.27643


 96%|█████████▋| 964/1000 [01:21<00:02, 12.46it/s]

Training_loss 0.27524
Training_loss 0.27418
Training_loss 0.27266


 97%|█████████▋| 968/1000 [01:21<00:02, 12.55it/s]

Training_loss 0.27181
Training_loss 0.27052
Training_loss 0.26892


 97%|█████████▋| 970/1000 [01:21<00:02, 11.91it/s]

Training_loss 0.26816
Training_loss 0.26723
Training_loss 0.26647


 97%|█████████▋| 974/1000 [01:22<00:02, 12.69it/s]

Training_loss 0.26532
Training_loss 0.26423
Training_loss 0.26335


 98%|█████████▊| 976/1000 [01:22<00:01, 12.86it/s]

Training_loss 0.26259
Training_loss 0.26140
Training_loss 0.26075


 98%|█████████▊| 980/1000 [01:22<00:01, 12.54it/s]

Training_loss 0.25989
Training_loss 0.25916
Training_loss 0.25859


 98%|█████████▊| 982/1000 [01:22<00:01, 12.85it/s]

Training_loss 0.25725
Training_loss 0.25666
Training_loss 0.25595


 99%|█████████▊| 986/1000 [01:23<00:01, 12.52it/s]

Training_loss 0.25488
Training_loss 0.25418
Training_loss 0.25341


 99%|█████████▉| 988/1000 [01:23<00:00, 12.87it/s]

Training_loss 0.25273
Training_loss 0.25199
Training_loss 0.25130
Training_loss 0.24969


 99%|█████████▉| 994/1000 [01:23<00:00, 13.53it/s]

Training_loss 0.24859
Training_loss 0.24777
Training_loss 0.24628
Training_loss 0.24543


100%|█████████▉| 996/1000 [01:23<00:00, 12.34it/s]

Training_loss 0.24456
Training_loss 0.24355
Training_loss 0.24294


100%|██████████| 1000/1000 [01:24<00:00, 11.87it/s]

Training_loss 0.24202
Training_loss 0.24087
Training_loss 0.23976





In [None]:
plot.plot()

In [None]:
fig, ax = plt.subplots()
x_axis = np.arange(1, T+1)
y_axis = np.array(test_accuracy)
ax.plot(x_axis, y_axis)

ax.set(xlabel='Number of Rounds', ylabel='Test Accuracy')
ax.grid()