In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
#pout = 0.01
pout = 0.4
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-3
no_users = sum(cluster_sizes)
batch_size = 20
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.         0.         0.125      0.07692308 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.14285714 0.         0.125      0.
  0.         0.        ]
 [0.         0.         0.         0.125      0.07692308 0.
  0.         0.125      0.125      0.         0.         0.
  0.07142857 0.         0.         0.         0.         0.1
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.07142857 0.1        0.         0.14285714 0.         0.
  0.         0.16666667]
 [0.125      0.125      0.         0.         0.         0.125
  0.1        0.         0.125      0.         0.         0.
  0.07142857 0.         0.125      0.         0.         0.
  0.         0.        ]
 [0.07692308 0.07692308 0.         0.         0.         0.07692308
  0.07692308 0.07692308 0.07692308 0.07692308 0.07692308 0.
  0.07142857 0.         0.         0.         0.

In [4]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [5]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [6]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 3.0, 3.0])
W2 = np.array([-2.0, 2.0, 3.0, -3.0])
W3 = 2 * W1
W4 = 2  * W2
W = [W1, W2]
m = 200
n = 4
noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i ]) + np.random.normal(0,noise_sd)
        datapoints[count] = {
                'features': features,
                'degree': node_degree(count, G),
                'label': label,
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [7]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [8]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(4, 1, bias=False)
        #self.fc2 = nn.Linear(2, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        #x = F.relu(self.fc1(x))
        output = self.fc1(x)
        #output = self.fc3(x)
        return output

In [9]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [10]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(35.6923, grad_fn=<MseLossBackward0>) tensor([  4.5316,  -4.0697, -10.5881,   6.9526]) tensor([-0.2369, -0.2935, -0.3029, -0.2542], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(32.2348, grad_fn=<MseLossBackward0>) tensor([  4.0152,  -3.7878, -10.0506,   6.3183]) tensor([-0.2822, -0.2528, -0.1970, -0.3237], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(23.0703, grad_fn=<MseLossBackward0>) tensor([ 2.8854, -4.2088, -7.0069,  3.9343]) tensor([-0.3223, -0.2149, -0.0965, -0.3869], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(30.5892, grad_fn=<MseLossBackward0>) tensor([ 5.0570, -7.5637, -7.5314,  5.2876]) tensor([-0.3512, -0.1728, -0.0264, -0.4263], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(29.1874, grad_fn=<MseLossBackward0>) tensor([ 4.0955, -3.7102, -9.5116,  6.3387]) tensor([-0.4018, -0.0972,  0.0489, -0.4791], grad_fn=<CatBack

18 tensor(0.9216, grad_fn=<MseLossBackward0>) tensor([ 0.6344, -0.7264, -1.4123,  1.2723]) tensor([-1.6962,  1.5274,  2.6229, -2.3909], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
18 tensor(0.6882, grad_fn=<MseLossBackward0>) tensor([ 0.4715, -0.7948, -0.9023,  0.9034]) tensor([-1.7025,  1.5347,  2.6370, -2.4036], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
18 tensor(0.9987, grad_fn=<MseLossBackward0>) tensor([ 0.9387, -1.5383, -0.9229,  1.1774]) tensor([-1.7073,  1.5427,  2.6461, -2.4127], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
19 tensor(0.8514, grad_fn=<MseLossBackward0>) tensor([ 0.6761, -0.7350, -1.3240,  1.2685]) tensor([-1.7166,  1.5580,  2.6553, -2.4244], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
19 tensor(0.7614, grad_fn=<MseLossBackward0>) tensor([ 0.5729, -0.6644, -1.2626,  1.1677]) tensor([-1.7234,  1.5654,  2.6685, -2.4371], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])


38 tensor(0.0253, grad_fn=<MseLossBackward0>) tensor([ 0.0843, -0.1293, -0.1323,  0.2451]) tensor([-1.9505,  1.9059,  2.9880, -2.8668], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(0.0215, grad_fn=<MseLossBackward0>) tensor([ 0.0669, -0.1395, -0.0577,  0.1992]) tensor([-1.9514,  1.9072,  2.9893, -2.8693], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(0.0340, grad_fn=<MseLossBackward0>) tensor([ 0.1632, -0.2971, -0.0429,  0.2526]) tensor([-1.9520,  1.9086,  2.9899, -2.8713], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
39 tensor(0.0242, grad_fn=<MseLossBackward0>) tensor([ 0.0981, -0.1361, -0.1205,  0.2426]) tensor([-1.9537,  1.9116,  2.9903, -2.8738], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
39 tensor(0.0214, grad_fn=<MseLossBackward0>) tensor([ 0.0763, -0.1190, -0.1162,  0.2266]) tensor([-1.9546,  1.9129,  2.9915, -2.8762], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])


57 tensor(0.0014, grad_fn=<MseLossBackward0>) tensor([ 0.0133, -0.0281, -0.0049,  0.0578]) tensor([-1.9899,  1.9775,  3.0083, -2.9657], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(0.0013, grad_fn=<MseLossBackward0>) tensor([ 0.0116, -0.0302,  0.0080,  0.0517]) tensor([-1.9900,  1.9778,  3.0084, -2.9662], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(0.0021, grad_fn=<MseLossBackward0>) tensor([ 0.0352, -0.0700,  0.0145,  0.0647]) tensor([-1.9902,  1.9781,  3.0083, -2.9667], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
58 tensor(0.0013, grad_fn=<MseLossBackward0>) tensor([ 0.0177, -0.0304, -0.0036,  0.0568]) tensor([-1.9905,  1.9788,  3.0081, -2.9674], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
58 tensor(0.0012, grad_fn=<MseLossBackward0>) tensor([ 0.0121, -0.0260, -0.0035,  0.0537]) tensor([-1.9907,  1.9791,  3.0082, -2.9680], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])


torch.Size([50, 1])
torch.Size([50, 1])
76 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0083, -0.0181,  0.0077,  0.0178]) tensor([-1.9978,  1.9944,  3.0039, -2.9910], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
77 tensor(9.4312e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0038, -0.0072,  0.0032,  0.0144]) tensor([-1.9978,  1.9946,  3.0038, -2.9911], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
77 tensor(8.9988e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0023, -0.0062,  0.0036,  0.0138]) tensor([-1.9979,  1.9947,  3.0038, -2.9913], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
77 tensor(9.1646e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0021, -0.0069,  0.0057,  0.0132]) tensor([-1.9979,  1.9947,  3.0038, -2.9914], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
77 tensor(0.0001, grad_fn=<MseLossBackward0>) tensor([ 0.0077, -0.0169,  0.0073,  0.0167]) tensor([-1.9979,  1.9948,  3.0037, -2.9916], grad_fn=<Cat

96 tensor(9.8336e-06, grad_fn=<MseLossBackward0>) tensor([ 0.0004, -0.0020,  0.0022,  0.0037]) tensor([-1.9995,  1.9987,  3.0012, -2.9976], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
96 tensor(1.3875e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0018, -0.0048,  0.0022,  0.0050]) tensor([-1.9995,  1.9987,  3.0012, -2.9977], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(7.6932e-06, grad_fn=<MseLossBackward0>) tensor([ 0.0010, -0.0014,  0.0013,  0.0034]) tensor([-1.9995,  1.9988,  3.0012, -2.9977], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(8.7373e-06, grad_fn=<MseLossBackward0>) tensor([ 0.0006, -0.0015,  0.0020,  0.0034]) tensor([-1.9996,  1.9988,  3.0011, -2.9977], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(8.9158e-06, grad_fn=<MseLossBackward0>) tensor([ 0.0004, -0.0019,  0.0021,  0.0034]) tensor([-1.9996,  1.9988,  3.0011, -2.9978], grad_fn=<CatBackward0>)
torch.Size([50, 1])


In [11]:
parameters_to_vector(model.parameters())

tensor([-1.9996,  1.9990,  3.0009, -2.9981], grad_fn=<CatBackward0>)

In [12]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [13]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [14]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0 + 1.0 * float(np.random.randn(1)))
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [15]:
print(projection_list[0])

[0, 0, 0, tensor([[0.9900, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.9900, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.9900, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.9900]]), 0, tensor([[0.1102, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.1102, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.1102, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.1102]]), 0, 0, 0, 0, 0, tensor([[2.6985, 0.0000, 0.0000, 0.0000],
        [0.0000, 2.6985, 0.0000, 0.0000],
        [0.0000, 0.0000, 2.6985, 0.0000],
        [0.0000, 0.0000, 0.0000, 2.6985]]), 0, tensor([[1.2667, 0.0000, 0.0000, 0.0000],
        [0.0000, 1.2667, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.2667, 0.0000],
        [0.0000, 0.0000, 0.0000, 1.2667]]), tensor([[1.8165, 0.0000, 0.0000, 0.0000],
        [0.0000, 1.8165, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.8165, 0.0000],
        [0.0000, 0.0000, 0.0000, 1.8165]]), 0, 0, 0, 0, 0]


In [16]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [17]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [18]:
torch.norm(datapoints[model.user_id]['exact_weights'])

tensor(5.0990, dtype=torch.float64)

In [19]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([-0.0088, -0.1648,  0.3889, -0.4318])


tensor([-0.0175, -0.3297,  0.7778, -0.8636], grad_fn=<CatBackward0>)

In [20]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            mat_vec_sum = torch.zeros_like(weights)
            for k in G.neighbors(i):
                mat_vec_sum = torch.add(mat_vec_sum, projected_weights[k][i] - projected_weights[i][k])
            #print(torch.outer(mat_vec_sum, weights))

            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * mat_vec_sum)
                                         
    projected_weights = []                                          
    update_ProjWeight(projection_list, projected_weights, first_run=False)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        #user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #total_rel_error.append(user_rel_error / no_users)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/2000 [00:00<06:58,  4.77it/s]

Training_loss 28.90711


  0%|          | 2/2000 [00:00<07:44,  4.30it/s]

Training_loss 28.64864


  0%|          | 4/2000 [00:00<06:48,  4.89it/s]

Training_loss 28.51301
Training_loss 28.34899


  0%|          | 6/2000 [00:01<06:06,  5.44it/s]

Training_loss 28.11288
Training_loss 27.99059


  0%|          | 8/2000 [00:01<06:20,  5.24it/s]

Training_loss 27.84978
Training_loss 27.70674


  0%|          | 10/2000 [00:01<05:47,  5.73it/s]

Training_loss 27.58733
Training_loss 27.51044


  1%|          | 12/2000 [00:02<05:54,  5.61it/s]

Training_loss 27.29387
Training_loss 27.08886


  1%|          | 13/2000 [00:02<05:57,  5.56it/s]

Training_loss 26.88996


  1%|          | 15/2000 [00:02<06:36,  5.00it/s]

Training_loss 26.64490
Training_loss 26.56890


  1%|          | 17/2000 [00:03<06:19,  5.23it/s]

Training_loss 26.46695
Training_loss 26.31749


  1%|          | 19/2000 [00:03<06:11,  5.33it/s]

Training_loss 26.14879
Training_loss 25.94793


  1%|          | 21/2000 [00:04<06:19,  5.22it/s]

Training_loss 25.84683
Training_loss 25.72467


  1%|          | 23/2000 [00:04<06:05,  5.41it/s]

Training_loss 25.65223
Training_loss 25.54357


  1%|▏         | 25/2000 [00:04<05:04,  6.49it/s]

Training_loss 25.40166
Training_loss 25.22405


  1%|▏         | 27/2000 [00:04<04:36,  7.14it/s]

Training_loss 25.08563
Training_loss 24.95639


  1%|▏         | 29/2000 [00:05<04:27,  7.37it/s]

Training_loss 24.85738
Training_loss 24.74422


  2%|▏         | 31/2000 [00:05<04:15,  7.71it/s]

Training_loss 24.62424
Training_loss 24.53088


  2%|▏         | 33/2000 [00:05<04:10,  7.85it/s]

Training_loss 24.39414
Training_loss 24.30565


  2%|▏         | 35/2000 [00:05<04:08,  7.92it/s]

Training_loss 24.20326
Training_loss 24.15362


  2%|▏         | 38/2000 [00:06<03:39,  8.95it/s]

Training_loss 24.07376
Training_loss 23.96001
Training_loss 23.80587


  2%|▏         | 40/2000 [00:06<03:38,  8.97it/s]

Training_loss 23.67683
Training_loss 23.58337
Training_loss 23.44365


  2%|▏         | 42/2000 [00:06<03:49,  8.54it/s]

Training_loss 23.33481
Training_loss 23.26607

  2%|▏         | 43/2000 [00:06<04:22,  7.45it/s]




  2%|▏         | 44/2000 [00:07<05:00,  6.51it/s]

Training_loss 23.13974


  2%|▏         | 46/2000 [00:07<05:55,  5.50it/s]

Training_loss 22.97352
Training_loss 22.89854


  2%|▏         | 48/2000 [00:07<06:25,  5.07it/s]

Training_loss 22.82443
Training_loss 22.70879


  2%|▎         | 50/2000 [00:08<06:05,  5.33it/s]

Training_loss 22.54441
Training_loss 22.46970


  3%|▎         | 51/2000 [00:08<06:22,  5.10it/s]

Training_loss 22.38947


  3%|▎         | 52/2000 [00:08<06:34,  4.94it/s]

Training_loss 22.25517


  3%|▎         | 53/2000 [00:09<07:28,  4.34it/s]

Training_loss 22.11452


  3%|▎         | 54/2000 [00:09<08:47,  3.69it/s]

Training_loss 22.00455


  3%|▎         | 55/2000 [00:09<08:56,  3.62it/s]

Training_loss 21.90520


  3%|▎         | 57/2000 [00:10<07:53,  4.10it/s]

Training_loss 21.78024
Training_loss 21.69467


  3%|▎         | 58/2000 [00:10<08:02,  4.03it/s]

Training_loss 21.58638


  3%|▎         | 59/2000 [00:10<08:08,  3.97it/s]

Training_loss 21.52956


  3%|▎         | 60/2000 [00:10<08:31,  3.80it/s]

Training_loss 21.40237


  3%|▎         | 62/2000 [00:11<07:26,  4.34it/s]

Training_loss 21.29776
Training_loss 21.24007


  3%|▎         | 64/2000 [00:11<06:14,  5.17it/s]

Training_loss 21.13531
Training_loss 20.99857


  3%|▎         | 66/2000 [00:11<05:01,  6.41it/s]

Training_loss 20.83385
Training_loss 20.75374


  3%|▎         | 69/2000 [00:12<04:00,  8.04it/s]

Training_loss 20.67642
Training_loss 20.55732
Training_loss 20.45104


  4%|▎         | 71/2000 [00:12<04:12,  7.65it/s]

Training_loss 20.40901
Training_loss 20.30605


  4%|▎         | 73/2000 [00:12<04:17,  7.49it/s]

Training_loss 20.26452
Training_loss 20.17382


  4%|▍         | 75/2000 [00:13<04:05,  7.83it/s]

Training_loss 20.06854
Training_loss 19.98922


  4%|▍         | 78/2000 [00:13<03:42,  8.65it/s]

Training_loss 19.88265
Training_loss 19.78433
Training_loss 19.67178


  4%|▍         | 80/2000 [00:13<03:41,  8.67it/s]

Training_loss 19.60327
Training_loss 19.48937


  4%|▍         | 82/2000 [00:13<03:36,  8.85it/s]

Training_loss 19.44555
Training_loss 19.37438


  4%|▍         | 84/2000 [00:14<04:11,  7.61it/s]

Training_loss 19.30896
Training_loss 19.26996


  4%|▍         | 86/2000 [00:14<03:58,  8.04it/s]

Training_loss 19.18307
Training_loss 19.03806


  4%|▍         | 88/2000 [00:14<03:59,  8.00it/s]

Training_loss 18.96021
Training_loss 18.87364


  4%|▍         | 89/2000 [00:14<05:34,  5.71it/s]

Training_loss 18.75074


  4%|▍         | 90/2000 [00:15<07:48,  4.08it/s]

Training_loss 18.68568


  5%|▍         | 91/2000 [00:15<08:38,  3.68it/s]

Training_loss 18.64665


  5%|▍         | 92/2000 [00:16<10:28,  3.04it/s]

Training_loss 18.48807


  5%|▍         | 93/2000 [00:16<10:44,  2.96it/s]

Training_loss 18.36380


  5%|▍         | 94/2000 [00:16<11:35,  2.74it/s]

Training_loss 18.27380


  5%|▍         | 95/2000 [00:17<12:02,  2.64it/s]

Training_loss 18.21186


  5%|▍         | 96/2000 [00:17<11:58,  2.65it/s]

Training_loss 18.10833


  5%|▍         | 97/2000 [00:18<12:45,  2.48it/s]

Training_loss 18.00207


  5%|▍         | 98/2000 [00:18<13:38,  2.32it/s]

Training_loss 17.91629


  5%|▍         | 99/2000 [00:19<13:25,  2.36it/s]

Training_loss 17.84970


  5%|▌         | 101/2000 [00:19<09:54,  3.19it/s]

Training_loss 17.75535
Training_loss 17.65260


  5%|▌         | 103/2000 [00:19<07:47,  4.06it/s]

Training_loss 17.54498
Training_loss 17.47797


  5%|▌         | 105/2000 [00:20<07:12,  4.38it/s]

Training_loss 17.39758
Training_loss 17.32953


  5%|▌         | 107/2000 [00:20<05:53,  5.36it/s]

Training_loss 17.24422
Training_loss 17.15988


  5%|▌         | 109/2000 [00:20<05:12,  6.05it/s]

Training_loss 17.08197
Training_loss 17.03198


  6%|▌         | 111/2000 [00:21<04:28,  7.05it/s]

Training_loss 16.97452
Training_loss 16.90919


  6%|▌         | 113/2000 [00:21<03:55,  8.01it/s]

Training_loss 16.86042
Training_loss 16.73131


  6%|▌         | 115/2000 [00:21<03:41,  8.52it/s]

Training_loss 16.66533
Training_loss 16.55733


  6%|▌         | 117/2000 [00:21<03:27,  9.06it/s]

Training_loss 16.50461
Training_loss 16.39127


  6%|▌         | 119/2000 [00:21<03:19,  9.44it/s]

Training_loss 16.27428
Training_loss 16.16927


  6%|▌         | 121/2000 [00:22<03:19,  9.41it/s]

Training_loss 16.12574
Training_loss 16.05277


  6%|▌         | 123/2000 [00:22<03:42,  8.43it/s]

Training_loss 15.98918
Training_loss 15.94492


  6%|▋         | 125/2000 [00:22<04:06,  7.61it/s]

Training_loss 15.86626
Training_loss 15.74985


  6%|▋         | 127/2000 [00:22<03:43,  8.40it/s]

Training_loss 15.67688
Training_loss 15.60121
Training_loss 15.51842


  6%|▋         | 130/2000 [00:23<03:38,  8.55it/s]

Training_loss 15.39791
Training_loss 15.33827


  7%|▋         | 132/2000 [00:23<03:32,  8.80it/s]

Training_loss 15.28304
Training_loss 15.22830


  7%|▋         | 134/2000 [00:23<03:35,  8.65it/s]

Training_loss 15.16882
Training_loss 15.08709


  7%|▋         | 136/2000 [00:23<03:56,  7.88it/s]

Training_loss 14.98384
Training_loss 14.93240


  7%|▋         | 139/2000 [00:24<03:26,  9.02it/s]

Training_loss 14.86682
Training_loss 14.77696
Training_loss 14.74188


  7%|▋         | 141/2000 [00:24<03:21,  9.21it/s]

Training_loss 14.64584
Training_loss 14.55343


  7%|▋         | 143/2000 [00:24<03:28,  8.92it/s]

Training_loss 14.50830
Training_loss 14.41165


  7%|▋         | 145/2000 [00:24<03:42,  8.34it/s]

Training_loss 14.34807
Training_loss 14.26316


  7%|▋         | 147/2000 [00:25<03:43,  8.30it/s]

Training_loss 14.19438
Training_loss 14.12923


  8%|▊         | 150/2000 [00:25<03:15,  9.45it/s]

Training_loss 14.06119
Training_loss 13.99649
Training_loss 13.93721


  8%|▊         | 152/2000 [00:25<03:26,  8.94it/s]

Training_loss 13.84874
Training_loss 13.80540


  8%|▊         | 154/2000 [00:26<03:43,  8.24it/s]

Training_loss 13.72870
Training_loss 13.65688


  8%|▊         | 157/2000 [00:26<03:23,  9.05it/s]

Training_loss 13.60405
Training_loss 13.55937
Training_loss 13.46275


  8%|▊         | 159/2000 [00:26<03:18,  9.27it/s]

Training_loss 13.35689
Training_loss 13.27604
Training_loss 13.21911


  8%|▊         | 162/2000 [00:26<03:10,  9.66it/s]

Training_loss 13.16287
Training_loss 13.12387


  8%|▊         | 164/2000 [00:27<03:23,  9.03it/s]

Training_loss 12.99919
Training_loss 12.92637
Training_loss 12.84433


  8%|▊         | 167/2000 [00:27<03:22,  9.04it/s]

Training_loss 12.77418
Training_loss 12.70611


  8%|▊         | 169/2000 [00:27<03:38,  8.39it/s]

Training_loss 12.66852
Training_loss 12.59831


  9%|▊         | 172/2000 [00:27<03:19,  9.18it/s]

Training_loss 12.52644
Training_loss 12.45035
Training_loss 12.40709


  9%|▊         | 174/2000 [00:28<03:12,  9.48it/s]

Training_loss 12.35323
Training_loss 12.28007


  9%|▉         | 177/2000 [00:28<03:06,  9.76it/s]

Training_loss 12.21479
Training_loss 12.15000
Training_loss 12.09097


  9%|▉         | 179/2000 [00:28<03:04,  9.88it/s]

Training_loss 12.06086
Training_loss 11.96480
Training_loss 11.88778


  9%|▉         | 181/2000 [00:28<03:05,  9.81it/s]

Training_loss 11.82032
Training_loss 11.79263


  9%|▉         | 184/2000 [00:29<03:10,  9.54it/s]

Training_loss 11.73533
Training_loss 11.61736
Training_loss 11.56206


  9%|▉         | 186/2000 [00:29<03:01, 10.02it/s]

Training_loss 11.47715
Training_loss 11.41159


 10%|▉         | 190/2000 [00:29<02:54, 10.37it/s]

Training_loss 11.34936
Training_loss 11.30542
Training_loss 11.22578


 10%|▉         | 192/2000 [00:29<02:54, 10.33it/s]

Training_loss 11.16605
Training_loss 11.08306
Training_loss 11.04240


 10%|▉         | 194/2000 [00:30<02:48, 10.71it/s]

Training_loss 11.00370
Training_loss 10.95639


 10%|▉         | 196/2000 [00:30<03:06,  9.65it/s]

Training_loss 10.89788


 10%|▉         | 198/2000 [00:30<04:00,  7.48it/s]

Training_loss 10.85835
Training_loss 10.78202


 10%|█         | 200/2000 [00:31<04:24,  6.82it/s]

Training_loss 10.73393
Training_loss 10.67612


 10%|█         | 202/2000 [00:31<05:21,  5.59it/s]

Training_loss 10.61527
Training_loss 10.56506


 10%|█         | 204/2000 [00:31<05:05,  5.88it/s]

Training_loss 10.53040
Training_loss 10.50101


 10%|█         | 206/2000 [00:32<04:54,  6.08it/s]

Training_loss 10.43663
Training_loss 10.40393


 10%|█         | 208/2000 [00:32<04:54,  6.08it/s]

Training_loss 10.37979
Training_loss 10.32485


 10%|█         | 210/2000 [00:32<05:06,  5.83it/s]

Training_loss 10.26609
Training_loss 10.21766


 11%|█         | 211/2000 [00:33<05:20,  5.59it/s]

Training_loss 10.16298


 11%|█         | 212/2000 [00:33<05:48,  5.13it/s]

Training_loss 10.11289


 11%|█         | 213/2000 [00:33<07:05,  4.20it/s]

Training_loss 10.04042


 11%|█         | 214/2000 [00:33<07:33,  3.94it/s]

Training_loss 9.99387


 11%|█         | 215/2000 [00:34<08:02,  3.70it/s]

Training_loss 9.94142


 11%|█         | 216/2000 [00:34<08:42,  3.41it/s]

Training_loss 9.89383


 11%|█         | 217/2000 [00:34<09:21,  3.18it/s]

Training_loss 9.85622


 11%|█         | 218/2000 [00:35<09:32,  3.11it/s]

Training_loss 9.78389


 11%|█         | 219/2000 [00:35<09:35,  3.10it/s]

Training_loss 9.72787


 11%|█         | 220/2000 [00:36<10:16,  2.89it/s]

Training_loss 9.69123


 11%|█         | 221/2000 [00:36<10:02,  2.96it/s]

Training_loss 9.62858


 11%|█         | 222/2000 [00:36<09:58,  2.97it/s]

Training_loss 9.60296


 11%|█         | 223/2000 [00:36<09:45,  3.04it/s]

Training_loss 9.57554


 11%|█         | 224/2000 [00:37<09:24,  3.15it/s]

Training_loss 9.53379


 11%|█▏        | 225/2000 [00:37<09:11,  3.22it/s]

Training_loss 9.49370


 11%|█▏        | 226/2000 [00:37<08:47,  3.36it/s]

Training_loss 9.45575


 11%|█▏        | 227/2000 [00:38<09:11,  3.22it/s]

Training_loss 9.42016


 11%|█▏        | 228/2000 [00:38<09:30,  3.11it/s]

Training_loss 9.36812


 12%|█▏        | 230/2000 [00:38<07:23,  3.99it/s]

Training_loss 9.32233
Training_loss 9.27023


 12%|█▏        | 231/2000 [00:39<06:57,  4.24it/s]

Training_loss 9.22540


 12%|█▏        | 232/2000 [00:39<07:06,  4.15it/s]

Training_loss 9.18636


 12%|█▏        | 233/2000 [00:39<07:15,  4.06it/s]

Training_loss 9.14085


 12%|█▏        | 234/2000 [00:39<07:34,  3.88it/s]

Training_loss 9.10680


 12%|█▏        | 235/2000 [00:40<07:52,  3.73it/s]

Training_loss 9.06017


 12%|█▏        | 236/2000 [00:40<09:26,  3.11it/s]

Training_loss 9.01062


 12%|█▏        | 237/2000 [00:40<09:20,  3.14it/s]

Training_loss 8.95493


 12%|█▏        | 238/2000 [00:41<09:18,  3.15it/s]

Training_loss 8.91456


 12%|█▏        | 239/2000 [00:41<09:05,  3.23it/s]

Training_loss 8.87746


 12%|█▏        | 240/2000 [00:41<08:42,  3.37it/s]

Training_loss 8.83528


 12%|█▏        | 241/2000 [00:42<08:17,  3.53it/s]

Training_loss 8.79614


 12%|█▏        | 242/2000 [00:42<08:22,  3.50it/s]

Training_loss 8.77308


 12%|█▏        | 243/2000 [00:42<08:29,  3.45it/s]

Training_loss 8.73849


 12%|█▏        | 244/2000 [00:43<08:55,  3.28it/s]

Training_loss 8.67686


 12%|█▏        | 245/2000 [00:43<09:01,  3.24it/s]

Training_loss 8.62735


 12%|█▏        | 246/2000 [00:43<08:53,  3.28it/s]

Training_loss 8.58833


 12%|█▏        | 247/2000 [00:43<08:39,  3.37it/s]

Training_loss 8.54508


 12%|█▏        | 248/2000 [00:44<08:33,  3.41it/s]

Training_loss 8.50402


 12%|█▏        | 249/2000 [00:44<08:31,  3.42it/s]

Training_loss 8.47208


 12%|█▎        | 250/2000 [00:44<08:26,  3.45it/s]

Training_loss 8.42520


 13%|█▎        | 251/2000 [00:45<08:31,  3.42it/s]

Training_loss 8.39485


 13%|█▎        | 252/2000 [00:45<08:59,  3.24it/s]

Training_loss 8.33029


 13%|█▎        | 253/2000 [00:45<08:37,  3.38it/s]

Training_loss 8.29164


 13%|█▎        | 254/2000 [00:45<08:20,  3.49it/s]

Training_loss 8.26211


 13%|█▎        | 255/2000 [00:46<08:30,  3.42it/s]

Training_loss 8.22109


 13%|█▎        | 256/2000 [00:46<09:10,  3.17it/s]

Training_loss 8.17410


 13%|█▎        | 257/2000 [00:47<09:49,  2.96it/s]

Training_loss 8.15788


 13%|█▎        | 258/2000 [00:47<09:50,  2.95it/s]

Training_loss 8.12029


 13%|█▎        | 260/2000 [00:47<07:43,  3.76it/s]

Training_loss 8.08849
Training_loss 8.06413


 13%|█▎        | 262/2000 [00:48<06:09,  4.70it/s]

Training_loss 8.02092
Training_loss 7.97407


 13%|█▎        | 263/2000 [00:48<06:03,  4.78it/s]

Training_loss 7.94049


 13%|█▎        | 264/2000 [00:48<06:27,  4.48it/s]

Training_loss 7.89593


 13%|█▎        | 266/2000 [00:48<06:10,  4.68it/s]

Training_loss 7.85028
Training_loss 7.81656


 13%|█▎        | 267/2000 [00:49<05:57,  4.85it/s]

Training_loss 7.77940


 13%|█▎        | 268/2000 [00:49<06:12,  4.65it/s]

Training_loss 7.74369


 13%|█▎        | 269/2000 [00:49<07:02,  4.09it/s]

Training_loss 7.71186


 14%|█▎        | 270/2000 [00:50<07:31,  3.83it/s]

Training_loss 7.68413


 14%|█▎        | 272/2000 [00:50<06:44,  4.28it/s]

Training_loss 7.63740
Training_loss 7.61080


 14%|█▎        | 274/2000 [00:50<05:55,  4.85it/s]

Training_loss 7.59075
Training_loss 7.56349


 14%|█▍        | 276/2000 [00:51<04:57,  5.80it/s]

Training_loss 7.53155
Training_loss 7.49545


 14%|█▍        | 278/2000 [00:51<04:22,  6.56it/s]

Training_loss 7.44895
Training_loss 7.43109


 14%|█▍        | 280/2000 [00:51<04:14,  6.77it/s]

Training_loss 7.39604
Training_loss 7.37144


 14%|█▍        | 282/2000 [00:51<03:48,  7.53it/s]

Training_loss 7.34187
Training_loss 7.32034


 14%|█▍        | 283/2000 [00:52<04:38,  6.16it/s]

Training_loss 7.28144


 14%|█▍        | 285/2000 [00:52<05:28,  5.23it/s]

Training_loss 7.25735
Training_loss 7.22906


 14%|█▍        | 286/2000 [00:52<05:16,  5.41it/s]

Training_loss 7.21166


 14%|█▍        | 287/2000 [00:52<06:06,  4.67it/s]

Training_loss 7.18016


 14%|█▍        | 288/2000 [00:53<06:16,  4.55it/s]

Training_loss 7.15365


 14%|█▍        | 290/2000 [00:53<06:18,  4.51it/s]

Training_loss 7.12450
Training_loss 7.09852


 15%|█▍        | 292/2000 [00:53<05:15,  5.41it/s]

Training_loss 7.04182
Training_loss 7.02218


 15%|█▍        | 293/2000 [00:54<06:02,  4.71it/s]

Training_loss 6.97705


 15%|█▍        | 294/2000 [00:54<06:15,  4.54it/s]

Training_loss 6.93968


 15%|█▍        | 295/2000 [00:54<06:42,  4.24it/s]

Training_loss 6.88917


 15%|█▍        | 296/2000 [00:54<06:41,  4.25it/s]

Training_loss 6.85497


 15%|█▍        | 297/2000 [00:55<06:37,  4.28it/s]

Training_loss 6.82171


 15%|█▍        | 298/2000 [00:55<06:35,  4.30it/s]

Training_loss 6.79692


 15%|█▌        | 300/2000 [00:55<06:14,  4.54it/s]

Training_loss 6.75289
Training_loss 6.72135


 15%|█▌        | 302/2000 [00:56<05:33,  5.09it/s]

Training_loss 6.68607
Training_loss 6.65024


 15%|█▌        | 304/2000 [00:56<05:04,  5.57it/s]

Training_loss 6.62047
Training_loss 6.58373


 15%|█▌        | 306/2000 [00:56<04:56,  5.70it/s]

Training_loss 6.55908
Training_loss 6.52832


 15%|█▌        | 307/2000 [00:57<05:49,  4.84it/s]

Training_loss 6.48941


 15%|█▌        | 308/2000 [00:57<06:28,  4.36it/s]

Training_loss 6.45379


 16%|█▌        | 310/2000 [00:57<06:18,  4.47it/s]

Training_loss 6.43358
Training_loss 6.40583


 16%|█▌        | 312/2000 [00:58<05:38,  4.99it/s]

Training_loss 6.38245
Training_loss 6.34115


 16%|█▌        | 314/2000 [00:58<05:24,  5.20it/s]

Training_loss 6.29836
Training_loss 6.26572


 16%|█▌        | 316/2000 [00:58<04:47,  5.86it/s]

Training_loss 6.23822
Training_loss 6.20315


 16%|█▌        | 318/2000 [00:59<04:19,  6.48it/s]

Training_loss 6.15705
Training_loss 6.13786


 16%|█▌        | 320/2000 [00:59<04:01,  6.96it/s]

Training_loss 6.10354
Training_loss 6.06439


 16%|█▌        | 322/2000 [00:59<04:02,  6.92it/s]

Training_loss 6.01820
Training_loss 5.98075


 16%|█▌        | 324/2000 [01:00<04:11,  6.65it/s]

Training_loss 5.94244
Training_loss 5.90403


 16%|█▋        | 326/2000 [01:00<04:11,  6.64it/s]

Training_loss 5.86284
Training_loss 5.82221


 16%|█▋        | 328/2000 [01:00<04:16,  6.51it/s]

Training_loss 5.78784
Training_loss 5.75012


 16%|█▋        | 330/2000 [01:01<04:33,  6.10it/s]

Training_loss 5.70503
Training_loss 5.68663


 17%|█▋        | 332/2000 [01:01<05:08,  5.41it/s]

Training_loss 5.65610
Training_loss 5.62285


 17%|█▋        | 333/2000 [01:01<05:22,  5.17it/s]

Training_loss 5.60592


 17%|█▋        | 334/2000 [01:02<07:28,  3.72it/s]

Training_loss 5.59380


 17%|█▋        | 335/2000 [01:02<07:50,  3.54it/s]

Training_loss 5.55506


 17%|█▋        | 336/2000 [01:02<08:37,  3.21it/s]

Training_loss 5.53047


 17%|█▋        | 337/2000 [01:03<09:30,  2.91it/s]

Training_loss 5.51040


 17%|█▋        | 338/2000 [01:03<10:54,  2.54it/s]

Training_loss 5.47992


 17%|█▋        | 339/2000 [01:04<11:40,  2.37it/s]

Training_loss 5.45512


 17%|█▋        | 341/2000 [01:04<08:49,  3.13it/s]

Training_loss 5.43062
Training_loss 5.41114


 17%|█▋        | 343/2000 [01:05<06:38,  4.16it/s]

Training_loss 5.38739
Training_loss 5.35620


 17%|█▋        | 345/2000 [01:05<05:05,  5.42it/s]

Training_loss 5.33801
Training_loss 5.30257


 17%|█▋        | 347/2000 [01:05<04:12,  6.54it/s]

Training_loss 5.28705
Training_loss 5.25983


 17%|█▋        | 349/2000 [01:05<03:44,  7.36it/s]

Training_loss 5.24143
Training_loss 5.20512


 18%|█▊        | 351/2000 [01:06<03:33,  7.72it/s]

Training_loss 5.17660
Training_loss 5.16762


 18%|█▊        | 353/2000 [01:06<03:25,  8.00it/s]

Training_loss 5.14628
Training_loss 5.12374


 18%|█▊        | 355/2000 [01:06<03:30,  7.81it/s]

Training_loss 5.10134
Training_loss 5.08050


 18%|█▊        | 357/2000 [01:06<03:15,  8.39it/s]

Training_loss 5.04974
Training_loss 5.01686


 18%|█▊        | 359/2000 [01:06<02:59,  9.13it/s]

Training_loss 4.99142
Training_loss 4.96772


 18%|█▊        | 361/2000 [01:07<02:57,  9.25it/s]

Training_loss 4.93290
Training_loss 4.91571


 18%|█▊        | 363/2000 [01:07<02:48,  9.70it/s]

Training_loss 4.88386
Training_loss 4.86749
Training_loss 4.84567


 18%|█▊        | 367/2000 [01:07<02:27, 11.08it/s]

Training_loss 4.82089
Training_loss 4.80598
Training_loss 4.78117


 18%|█▊        | 369/2000 [01:07<02:42, 10.01it/s]

Training_loss 4.75319
Training_loss 4.70833


 19%|█▊        | 371/2000 [01:08<03:04,  8.85it/s]

Training_loss 4.68732
Training_loss 4.66814


 19%|█▊        | 373/2000 [01:08<03:19,  8.15it/s]

Training_loss 4.65453
Training_loss 4.63295


 19%|█▉        | 376/2000 [01:08<02:54,  9.33it/s]

Training_loss 4.61748
Training_loss 4.59958
Training_loss 4.57793


 19%|█▉        | 378/2000 [01:08<02:36, 10.39it/s]

Training_loss 4.55878
Training_loss 4.53590
Training_loss 4.51488


 19%|█▉        | 382/2000 [01:09<02:24, 11.16it/s]

Training_loss 4.50264
Training_loss 4.47782
Training_loss 4.46517


 19%|█▉        | 384/2000 [01:09<02:19, 11.59it/s]

Training_loss 4.43813
Training_loss 4.41578
Training_loss 4.39455


 19%|█▉        | 388/2000 [01:09<02:26, 11.04it/s]

Training_loss 4.38033
Training_loss 4.35609
Training_loss 4.33144


 20%|█▉        | 390/2000 [01:09<02:20, 11.45it/s]

Training_loss 4.30863
Training_loss 4.27947
Training_loss 4.26513


 20%|█▉        | 392/2000 [01:10<02:15, 11.85it/s]

Training_loss 4.25205
Training_loss 4.23752


 20%|█▉        | 394/2000 [01:10<02:29, 10.77it/s]

Training_loss 4.22457
Training_loss 4.20333


 20%|█▉        | 397/2000 [01:10<03:10,  8.43it/s]

Training_loss 4.17261
Training_loss 4.15949


 20%|█▉        | 399/2000 [01:11<03:29,  7.66it/s]

Training_loss 4.13257
Training_loss 4.10220


 20%|██        | 401/2000 [01:11<03:35,  7.42it/s]

Training_loss 4.08039
Training_loss 4.05376


 20%|██        | 403/2000 [01:11<03:49,  6.96it/s]

Training_loss 4.04574
Training_loss 4.01952


 20%|██        | 406/2000 [01:12<03:09,  8.42it/s]

Training_loss 4.00748
Training_loss 3.98538
Training_loss 3.96728


 20%|██        | 407/2000 [01:12<03:07,  8.51it/s]

Training_loss 3.95109
Training_loss 3.93671


 20%|██        | 410/2000 [01:12<03:16,  8.09it/s]

Training_loss 3.92049
Training_loss 3.91054


 21%|██        | 412/2000 [01:12<03:06,  8.52it/s]

Training_loss 3.89256
Training_loss 3.88063


 21%|██        | 414/2000 [01:13<03:23,  7.80it/s]

Training_loss 3.86301
Training_loss 3.84531


 21%|██        | 416/2000 [01:13<03:08,  8.41it/s]

Training_loss 3.82923
Training_loss 3.81441


 21%|██        | 419/2000 [01:13<02:53,  9.14it/s]

Training_loss 3.80138
Training_loss 3.78634
Training_loss 3.77253


 21%|██        | 422/2000 [01:13<02:46,  9.49it/s]

Training_loss 3.76223
Training_loss 3.73712
Training_loss 3.71680


 21%|██▏       | 425/2000 [01:14<02:47,  9.43it/s]

Training_loss 3.69900
Training_loss 3.67746
Training_loss 3.65855


 21%|██▏       | 427/2000 [01:14<02:42,  9.67it/s]

Training_loss 3.64528
Training_loss 3.63340


 21%|██▏       | 429/2000 [01:14<02:44,  9.56it/s]

Training_loss 3.61548
Training_loss 3.59653


 22%|██▏       | 431/2000 [01:14<02:57,  8.84it/s]

Training_loss 3.58554
Training_loss 3.57508


 22%|██▏       | 433/2000 [01:15<03:20,  7.83it/s]

Training_loss 3.55467
Training_loss 3.52897


 22%|██▏       | 435/2000 [01:15<03:06,  8.38it/s]

Training_loss 3.51289
Training_loss 3.48904
Training_loss 3.47776


 22%|██▏       | 438/2000 [01:15<02:48,  9.25it/s]

Training_loss 3.45988
Training_loss 3.43622


 22%|██▏       | 440/2000 [01:15<02:55,  8.90it/s]

Training_loss 3.41670
Training_loss 3.40796
Training_loss 3.38046


 22%|██▏       | 443/2000 [01:16<02:42,  9.56it/s]

Training_loss 3.36415
Training_loss 3.34254
Training_loss 3.33338


 22%|██▏       | 447/2000 [01:16<02:35,  9.96it/s]

Training_loss 3.31677
Training_loss 3.29799
Training_loss 3.28570


 22%|██▏       | 449/2000 [01:16<02:33, 10.08it/s]

Training_loss 3.27064
Training_loss 3.25998
Training_loss 3.24212


 23%|██▎       | 452/2000 [01:17<02:35,  9.95it/s]

Training_loss 3.22068
Training_loss 3.20144


 23%|██▎       | 454/2000 [01:17<02:38,  9.77it/s]

Training_loss 3.18414
Training_loss 3.17630
Training_loss 3.15735


 23%|██▎       | 458/2000 [01:17<02:29, 10.29it/s]

Training_loss 3.14446
Training_loss 3.13491
Training_loss 3.12367


 23%|██▎       | 460/2000 [01:17<02:23, 10.70it/s]

Training_loss 3.11316
Training_loss 3.09427
Training_loss 3.08092


 23%|██▎       | 464/2000 [01:18<02:24, 10.64it/s]

Training_loss 3.06682
Training_loss 3.04247
Training_loss 3.03105


 23%|██▎       | 466/2000 [01:18<02:41,  9.48it/s]

Training_loss 3.01908
Training_loss 3.00202


 23%|██▎       | 468/2000 [01:18<02:42,  9.42it/s]

Training_loss 2.99095
Training_loss 2.97774
Training_loss 2.96732


 24%|██▎       | 472/2000 [01:19<02:35,  9.83it/s]

Training_loss 2.94737
Training_loss 2.93757
Training_loss 2.91914


 24%|██▎       | 474/2000 [01:19<02:45,  9.20it/s]

Training_loss 2.90587
Training_loss 2.88656


 24%|██▍       | 476/2000 [01:19<02:39,  9.53it/s]

Training_loss 2.86721
Training_loss 2.85515
Training_loss 2.83718


 24%|██▍       | 479/2000 [01:19<02:36,  9.70it/s]

Training_loss 2.82841
Training_loss 2.80991


 24%|██▍       | 480/2000 [01:19<02:41,  9.41it/s]

Training_loss 2.79887
Training_loss 2.79085


 24%|██▍       | 483/2000 [01:20<02:46,  9.14it/s]

Training_loss 2.78128
Training_loss 2.77116


 24%|██▍       | 485/2000 [01:20<02:45,  9.13it/s]

Training_loss 2.76402
Training_loss 2.75796
Training_loss 2.74595


 24%|██▍       | 488/2000 [01:20<02:41,  9.35it/s]

Training_loss 2.73724
Training_loss 2.72618
Training_loss 2.71494


 25%|██▍       | 491/2000 [01:21<02:42,  9.31it/s]

Training_loss 2.69830
Training_loss 2.68210


 25%|██▍       | 493/2000 [01:21<02:48,  8.95it/s]

Training_loss 2.66509
Training_loss 2.65158


 25%|██▍       | 496/2000 [01:21<02:43,  9.21it/s]

Training_loss 2.64243
Training_loss 2.63378
Training_loss 2.62834


 25%|██▍       | 498/2000 [01:21<02:57,  8.48it/s]

Training_loss 2.61127
Training_loss 2.60216


 25%|██▌       | 501/2000 [01:22<02:49,  8.87it/s]

Training_loss 2.58315
Training_loss 2.56692
Training_loss 2.55698


 25%|██▌       | 503/2000 [01:22<02:33,  9.77it/s]

Training_loss 2.54448
Training_loss 2.53114
Training_loss 2.51789


 25%|██▌       | 505/2000 [01:22<02:24, 10.35it/s]

Training_loss 2.50536
Training_loss 2.49094


 25%|██▌       | 508/2000 [01:23<02:53,  8.59it/s]

Training_loss 2.47381
Training_loss 2.45767


 26%|██▌       | 510/2000 [01:23<03:08,  7.92it/s]

Training_loss 2.45195
Training_loss 2.43951


 26%|██▌       | 512/2000 [01:23<03:29,  7.11it/s]

Training_loss 2.42180
Training_loss 2.41266


 26%|██▌       | 515/2000 [01:23<02:55,  8.45it/s]

Training_loss 2.40288
Training_loss 2.39103
Training_loss 2.38300


 26%|██▌       | 516/2000 [01:24<02:49,  8.75it/s]

Training_loss 2.36379
Training_loss 2.34815


 26%|██▌       | 519/2000 [01:24<03:13,  7.64it/s]

Training_loss 2.34075
Training_loss 2.32416


 26%|██▌       | 521/2000 [01:24<02:52,  8.57it/s]

Training_loss 2.31363
Training_loss 2.30051
Training_loss 2.29247


 26%|██▋       | 525/2000 [01:25<02:29,  9.89it/s]

Training_loss 2.28220
Training_loss 2.27454
Training_loss 2.26342


 26%|██▋       | 527/2000 [01:25<02:19, 10.57it/s]

Training_loss 2.26052
Training_loss 2.25131
Training_loss 2.23847


 27%|██▋       | 531/2000 [01:25<02:12, 11.10it/s]

Training_loss 2.22854
Training_loss 2.21396
Training_loss 2.20511


 27%|██▋       | 533/2000 [01:25<02:12, 11.05it/s]

Training_loss 2.19938
Training_loss 2.19063
Training_loss 2.17972


 27%|██▋       | 537/2000 [01:26<02:11, 11.16it/s]

Training_loss 2.16952
Training_loss 2.15655
Training_loss 2.14838


 27%|██▋       | 539/2000 [01:26<02:06, 11.58it/s]

Training_loss 2.13482
Training_loss 2.12598
Training_loss 2.11278


 27%|██▋       | 543/2000 [01:26<02:07, 11.47it/s]

Training_loss 2.09930
Training_loss 2.09204
Training_loss 2.08366


 27%|██▋       | 545/2000 [01:26<02:08, 11.34it/s]

Training_loss 2.07427
Training_loss 2.06560
Training_loss 2.06037


 27%|██▋       | 547/2000 [01:26<02:18, 10.47it/s]

Training_loss 2.05230


 27%|██▋       | 549/2000 [01:27<03:09,  7.65it/s]

Training_loss 2.04537
Training_loss 2.03967


 28%|██▊       | 551/2000 [01:27<03:49,  6.31it/s]

Training_loss 2.02807
Training_loss 2.01603


 28%|██▊       | 553/2000 [01:28<03:53,  6.20it/s]

Training_loss 2.00253
Training_loss 1.99038


 28%|██▊       | 555/2000 [01:28<03:48,  6.32it/s]

Training_loss 1.98089
Training_loss 1.97135


 28%|██▊       | 557/2000 [01:28<03:42,  6.47it/s]

Training_loss 1.96543
Training_loss 1.95904


 28%|██▊       | 559/2000 [01:29<03:18,  7.28it/s]

Training_loss 1.94665
Training_loss 1.93673


 28%|██▊       | 561/2000 [01:29<03:03,  7.83it/s]

Training_loss 1.92824
Training_loss 1.91839


 28%|██▊       | 563/2000 [01:29<02:57,  8.12it/s]

Training_loss 1.90595
Training_loss 1.89665


 28%|██▊       | 565/2000 [01:29<02:56,  8.11it/s]

Training_loss 1.88894
Training_loss 1.88136


 28%|██▊       | 567/2000 [01:30<03:01,  7.90it/s]

Training_loss 1.86982
Training_loss 1.86188


 28%|██▊       | 569/2000 [01:30<03:06,  7.69it/s]

Training_loss 1.85190
Training_loss 1.83850


 29%|██▊       | 572/2000 [01:30<02:41,  8.85it/s]

Training_loss 1.82938
Training_loss 1.82157
Training_loss 1.81095


 29%|██▊       | 573/2000 [01:30<02:38,  8.99it/s]

Training_loss 1.79953
Training_loss 1.79175


 29%|██▉       | 576/2000 [01:31<02:32,  9.32it/s]

Training_loss 1.78445
Training_loss 1.77284


 29%|██▉       | 578/2000 [01:31<02:41,  8.80it/s]

Training_loss 1.75780
Training_loss 1.75108


 29%|██▉       | 580/2000 [01:31<02:39,  8.88it/s]

Training_loss 1.74016
Training_loss 1.72896


 29%|██▉       | 582/2000 [01:31<02:27,  9.60it/s]

Training_loss 1.71035
Training_loss 1.70449
Training_loss 1.68930


 29%|██▉       | 585/2000 [01:32<02:31,  9.36it/s]

Training_loss 1.67997
Training_loss 1.67469


 29%|██▉       | 587/2000 [01:32<02:48,  8.41it/s]

Training_loss 1.66734
Training_loss 1.65753


 29%|██▉       | 589/2000 [01:32<02:39,  8.85it/s]

Training_loss 1.64863
Training_loss 1.63928


 30%|██▉       | 591/2000 [01:32<02:39,  8.82it/s]

Training_loss 1.63301
Training_loss 1.62592


 30%|██▉       | 593/2000 [01:32<02:42,  8.67it/s]

Training_loss 1.61979
Training_loss 1.61282


 30%|██▉       | 596/2000 [01:33<02:33,  9.13it/s]

Training_loss 1.60537
Training_loss 1.59860
Training_loss 1.59314


 30%|██▉       | 598/2000 [01:33<02:31,  9.24it/s]

Training_loss 1.58510
Training_loss 1.57874


 30%|███       | 601/2000 [01:33<02:25,  9.62it/s]

Training_loss 1.57020
Training_loss 1.56446
Training_loss 1.55712


 30%|███       | 603/2000 [01:33<02:23,  9.73it/s]

Training_loss 1.54870
Training_loss 1.53833


 30%|███       | 604/2000 [01:34<02:23,  9.75it/s]

Training_loss 1.53229
Training_loss 1.52551


 30%|███       | 608/2000 [01:34<02:21,  9.82it/s]

Training_loss 1.52077
Training_loss 1.51443
Training_loss 1.50800


 30%|███       | 610/2000 [01:34<02:16, 10.20it/s]

Training_loss 1.49945
Training_loss 1.49284
Training_loss 1.48636


 31%|███       | 612/2000 [01:34<02:16, 10.20it/s]

Training_loss 1.47785
Training_loss 1.47157


 31%|███       | 616/2000 [01:35<02:22,  9.74it/s]

Training_loss 1.46332
Training_loss 1.45783
Training_loss 1.45000


 31%|███       | 618/2000 [01:35<02:16, 10.13it/s]

Training_loss 1.44539
Training_loss 1.44143
Training_loss 1.43836


 31%|███       | 622/2000 [01:35<02:11, 10.49it/s]

Training_loss 1.42987
Training_loss 1.42307
Training_loss 1.41504


 31%|███       | 624/2000 [01:36<02:26,  9.42it/s]

Training_loss 1.40999
Training_loss 1.40616


 31%|███▏      | 626/2000 [01:36<02:47,  8.21it/s]

Training_loss 1.40457
Training_loss 1.39880


 31%|███▏      | 629/2000 [01:36<02:28,  9.26it/s]

Training_loss 1.39408
Training_loss 1.38807
Training_loss 1.38156


 32%|███▏      | 631/2000 [01:36<02:27,  9.27it/s]

Training_loss 1.37483
Training_loss 1.36990


 32%|███▏      | 633/2000 [01:37<02:48,  8.10it/s]

Training_loss 1.36225
Training_loss 1.35312
Training_loss 1.34581


 32%|███▏      | 636/2000 [01:37<02:30,  9.04it/s]

Training_loss 1.33909
Training_loss 1.33110
Training_loss 1.32424


 32%|███▏      | 639/2000 [01:37<02:38,  8.56it/s]

Training_loss 1.31828
Training_loss 1.31245


 32%|███▏      | 641/2000 [01:38<02:38,  8.56it/s]

Training_loss 1.30197
Training_loss 1.29665


 32%|███▏      | 644/2000 [01:38<02:23,  9.44it/s]

Training_loss 1.29299
Training_loss 1.28407
Training_loss 1.28123


 32%|███▏      | 646/2000 [01:38<02:25,  9.33it/s]

Training_loss 1.27477
Training_loss 1.26438
Training_loss 1.25625


 32%|███▎      | 650/2000 [01:39<02:08, 10.50it/s]

Training_loss 1.24958
Training_loss 1.24290
Training_loss 1.23591


 33%|███▎      | 652/2000 [01:39<02:19,  9.66it/s]

Training_loss 1.23346
Training_loss 1.22941


 33%|███▎      | 654/2000 [01:39<02:19,  9.65it/s]

Training_loss 1.22221
Training_loss 1.21656
Training_loss 1.21185


 33%|███▎      | 656/2000 [01:39<02:13, 10.09it/s]

Training_loss 1.20405
Training_loss 1.19620


 33%|███▎      | 659/2000 [01:39<02:21,  9.46it/s]

Training_loss 1.18977
Training_loss 1.18651


 33%|███▎      | 660/2000 [01:40<02:27,  9.07it/s]

Training_loss 1.18145
Training_loss 1.17396


 33%|███▎      | 663/2000 [01:40<02:26,  9.12it/s]

Training_loss 1.17019
Training_loss 1.16388


 33%|███▎      | 665/2000 [01:40<02:30,  8.88it/s]

Training_loss 1.15662
Training_loss 1.15143


 33%|███▎      | 667/2000 [01:40<02:30,  8.85it/s]

Training_loss 1.14686
Training_loss 1.13857


 33%|███▎      | 669/2000 [01:41<02:27,  9.04it/s]

Training_loss 1.13343
Training_loss 1.12600


 34%|███▎      | 671/2000 [01:41<02:48,  7.89it/s]

Training_loss 1.12187
Training_loss 1.11818


 34%|███▎      | 673/2000 [01:41<02:36,  8.46it/s]

Training_loss 1.11372
Training_loss 1.10703


 34%|███▍      | 676/2000 [01:41<02:20,  9.40it/s]

Training_loss 1.10378
Training_loss 1.09884
Training_loss 1.09369


 34%|███▍      | 678/2000 [01:42<02:19,  9.45it/s]

Training_loss 1.09024
Training_loss 1.08663


 34%|███▍      | 680/2000 [01:42<02:21,  9.32it/s]

Training_loss 1.08159
Training_loss 1.07762


 34%|███▍      | 683/2000 [01:42<02:17,  9.57it/s]

Training_loss 1.07307
Training_loss 1.06783
Training_loss 1.06233


 34%|███▍      | 685/2000 [01:42<02:33,  8.59it/s]

Training_loss 1.05873
Training_loss 1.05102


 34%|███▍      | 687/2000 [01:43<02:27,  8.92it/s]

Training_loss 1.04603
Training_loss 1.03988


 34%|███▍      | 689/2000 [01:43<02:25,  9.00it/s]

Training_loss 1.03416
Training_loss 1.03078


 35%|███▍      | 691/2000 [01:43<02:34,  8.47it/s]

Training_loss 1.02700
Training_loss 1.02419


 35%|███▍      | 692/2000 [01:43<02:35,  8.41it/s]

Training_loss 1.01899
Training_loss 1.01718


 35%|███▍      | 695/2000 [01:44<02:40,  8.15it/s]

Training_loss 1.01308
Training_loss 1.01084


 35%|███▍      | 697/2000 [01:44<02:25,  8.96it/s]

Training_loss 1.00691
Training_loss 1.00358
Training_loss 1.00020


 35%|███▌      | 700/2000 [01:44<02:23,  9.06it/s]

Training_loss 0.99413
Training_loss 0.98941


 35%|███▌      | 702/2000 [01:44<02:29,  8.67it/s]

Training_loss 0.98507
Training_loss 0.98138


 35%|███▌      | 704/2000 [01:45<02:40,  8.06it/s]

Training_loss 0.97723
Training_loss 0.97515


 35%|███▌      | 706/2000 [01:45<02:39,  8.12it/s]

Training_loss 0.97088
Training_loss 0.96849


 35%|███▌      | 708/2000 [01:45<02:29,  8.63it/s]

Training_loss 0.96566
Training_loss 0.96138


 36%|███▌      | 711/2000 [01:45<02:11,  9.81it/s]

Training_loss 0.95456
Training_loss 0.94966
Training_loss 0.94417


 36%|███▌      | 713/2000 [01:46<02:17,  9.33it/s]

Training_loss 0.93912
Training_loss 0.93446


 36%|███▌      | 715/2000 [01:46<02:19,  9.19it/s]

Training_loss 0.93129
Training_loss 0.92775


 36%|███▌      | 717/2000 [01:46<02:26,  8.76it/s]

Training_loss 0.92108
Training_loss 0.91827


 36%|███▌      | 719/2000 [01:46<02:28,  8.65it/s]

Training_loss 0.91240
Training_loss 0.90907
Training_loss 0.90401


 36%|███▌      | 722/2000 [01:47<02:35,  8.21it/s]

Training_loss 0.90107
Training_loss 0.89601


 36%|███▌      | 724/2000 [01:47<02:28,  8.59it/s]

Training_loss 0.88875
Training_loss 0.88501


 36%|███▋      | 726/2000 [01:47<02:25,  8.77it/s]

Training_loss 0.88234
Training_loss 0.87774


 36%|███▋      | 728/2000 [01:47<02:34,  8.25it/s]

Training_loss 0.87429
Training_loss 0.87140


 36%|███▋      | 730/2000 [01:48<02:43,  7.78it/s]

Training_loss 0.86724
Training_loss 0.86612


 37%|███▋      | 732/2000 [01:48<02:30,  8.43it/s]

Training_loss 0.86418
Training_loss 0.85982


 37%|███▋      | 734/2000 [01:48<02:28,  8.54it/s]

Training_loss 0.85846
Training_loss 0.85525


 37%|███▋      | 736/2000 [01:48<02:51,  7.39it/s]

Training_loss 0.85201
Training_loss 0.84833


 37%|███▋      | 738/2000 [01:49<02:41,  7.83it/s]

Training_loss 0.84511
Training_loss 0.84163


 37%|███▋      | 740/2000 [01:49<02:37,  8.03it/s]

Training_loss 0.83508
Training_loss 0.83180


 37%|███▋      | 742/2000 [01:49<02:41,  7.80it/s]

Training_loss 0.82857
Training_loss 0.82440


 37%|███▋      | 744/2000 [01:49<02:25,  8.62it/s]

Training_loss 0.82050
Training_loss 0.81772


 37%|███▋      | 746/2000 [01:50<02:20,  8.93it/s]

Training_loss 0.81373
Training_loss 0.80806


 37%|███▋      | 748/2000 [01:50<02:24,  8.68it/s]

Training_loss 0.80545
Training_loss 0.80375


 38%|███▊      | 750/2000 [01:50<02:20,  8.93it/s]

Training_loss 0.80042
Training_loss 0.79704


 38%|███▊      | 752/2000 [01:50<02:21,  8.84it/s]

Training_loss 0.79377
Training_loss 0.79012


 38%|███▊      | 754/2000 [01:50<02:21,  8.80it/s]

Training_loss 0.78672
Training_loss 0.78356


 38%|███▊      | 756/2000 [01:51<02:09,  9.61it/s]

Training_loss 0.77954
Training_loss 0.77749
Training_loss 0.77117


 38%|███▊      | 759/2000 [01:51<02:12,  9.37it/s]

Training_loss 0.76940
Training_loss 0.76559
Training_loss 0.76286


 38%|███▊      | 761/2000 [01:51<02:08,  9.62it/s]

Training_loss 0.76124
Training_loss 0.75759


 38%|███▊      | 763/2000 [01:51<02:09,  9.52it/s]

Training_loss 0.75295
Training_loss 0.74833


 38%|███▊      | 766/2000 [01:52<02:10,  9.48it/s]

Training_loss 0.74389
Training_loss 0.74005


 38%|███▊      | 769/2000 [01:52<02:08,  9.54it/s]

Training_loss 0.73699
Training_loss 0.73363
Training_loss 0.73091


 39%|███▊      | 772/2000 [01:52<02:04,  9.83it/s]

Training_loss 0.72660
Training_loss 0.72373
Training_loss 0.72056


 39%|███▊      | 774/2000 [01:53<02:07,  9.59it/s]

Training_loss 0.71887
Training_loss 0.71630


 39%|███▉      | 775/2000 [01:53<02:08,  9.52it/s]

Training_loss 0.71431
Training_loss 0.71235


 39%|███▉      | 778/2000 [01:53<02:14,  9.10it/s]

Training_loss 0.70634
Training_loss 0.70225


 39%|███▉      | 780/2000 [01:53<02:18,  8.79it/s]

Training_loss 0.69827
Training_loss 0.69380


 39%|███▉      | 782/2000 [01:53<02:11,  9.23it/s]

Training_loss 0.69139
Training_loss 0.68907


 39%|███▉      | 784/2000 [01:54<02:14,  9.07it/s]

Training_loss 0.68659
Training_loss 0.68354
Training_loss 0.68111


 39%|███▉      | 788/2000 [01:54<01:51, 10.86it/s]

Training_loss 0.67754
Training_loss 0.67476
Training_loss 0.67228


 40%|███▉      | 790/2000 [01:54<01:47, 11.26it/s]

Training_loss 0.66972
Training_loss 0.66720
Training_loss 0.66362


 40%|███▉      | 792/2000 [01:54<01:44, 11.52it/s]

Training_loss 0.66005
Training_loss 0.65858


 40%|███▉      | 794/2000 [01:55<01:53, 10.66it/s]

Training_loss 0.65585
Training_loss 0.65311


 40%|███▉      | 797/2000 [01:55<02:27,  8.14it/s]

Training_loss 0.64724
Training_loss 0.64571


 40%|███▉      | 799/2000 [01:55<02:24,  8.33it/s]

Training_loss 0.64413
Training_loss 0.63906
Training_loss 0.63661


 40%|████      | 803/2000 [01:56<02:06,  9.45it/s]

Training_loss 0.63451
Training_loss 0.63144
Training_loss 0.62709


 40%|████      | 806/2000 [01:56<02:06,  9.47it/s]

Training_loss 0.62480
Training_loss 0.62145
Training_loss 0.61913


 40%|████      | 808/2000 [01:56<02:12,  9.00it/s]

Training_loss 0.61625
Training_loss 0.61234


 40%|████      | 810/2000 [01:56<02:22,  8.37it/s]

Training_loss 0.61028
Training_loss 0.60789


 41%|████      | 812/2000 [01:57<02:41,  7.35it/s]

Training_loss 0.60567
Training_loss 0.60233


 41%|████      | 814/2000 [01:57<02:40,  7.40it/s]

Training_loss 0.60027
Training_loss 0.59713


 41%|████      | 816/2000 [01:57<02:25,  8.15it/s]

Training_loss 0.59397
Training_loss 0.59132


 41%|████      | 817/2000 [01:57<02:24,  8.20it/s]

Training_loss 0.58730
Training_loss 0.58438


 41%|████      | 820/2000 [01:58<02:10,  9.06it/s]

Training_loss 0.58171
Training_loss 0.57595


 41%|████      | 823/2000 [01:58<02:04,  9.49it/s]

Training_loss 0.57406
Training_loss 0.57144
Training_loss 0.56807


 41%|████▏     | 825/2000 [01:58<02:03,  9.51it/s]

Training_loss 0.56656
Training_loss 0.56426


 41%|████▏     | 827/2000 [01:58<02:13,  8.77it/s]

Training_loss 0.56211
Training_loss 0.55982
Training_loss 0.55606


 42%|████▏     | 830/2000 [01:59<02:05,  9.30it/s]

Training_loss 0.55318
Training_loss 0.55142


 42%|████▏     | 832/2000 [01:59<02:09,  9.01it/s]

Training_loss 0.54849
Training_loss 0.54699


 42%|████▏     | 834/2000 [01:59<02:23,  8.12it/s]

Training_loss 0.54489
Training_loss 0.54337


 42%|████▏     | 836/2000 [02:00<02:27,  7.87it/s]

Training_loss 0.54202
Training_loss 0.53922


 42%|████▏     | 839/2000 [02:00<02:20,  8.26it/s]

Training_loss 0.53718
Training_loss 0.53490
Training_loss 0.53266


 42%|████▏     | 841/2000 [02:00<02:42,  7.14it/s]

Training_loss 0.52988
Training_loss 0.52776


 42%|████▏     | 843/2000 [02:01<02:43,  7.08it/s]

Training_loss 0.52659
Training_loss 0.52386


 42%|████▏     | 845/2000 [02:01<02:23,  8.07it/s]

Training_loss 0.52136
Training_loss 0.52041


 42%|████▏     | 847/2000 [02:01<02:28,  7.76it/s]

Training_loss 0.51725
Training_loss 0.51515


 42%|████▎     | 850/2000 [02:01<02:10,  8.81it/s]

Training_loss 0.51300
Training_loss 0.51192
Training_loss 0.50968


 43%|████▎     | 852/2000 [02:02<02:11,  8.70it/s]

Training_loss 0.50791
Training_loss 0.50555


 43%|████▎     | 854/2000 [02:02<02:08,  8.92it/s]

Training_loss 0.50379
Training_loss 0.50239


 43%|████▎     | 856/2000 [02:02<02:06,  9.07it/s]

Training_loss 0.49911
Training_loss 0.49702


 43%|████▎     | 858/2000 [02:02<02:14,  8.49it/s]

Training_loss 0.49482
Training_loss 0.49303


 43%|████▎     | 860/2000 [02:02<02:11,  8.65it/s]

Training_loss 0.49075
Training_loss 0.48827


 43%|████▎     | 862/2000 [02:03<02:25,  7.80it/s]

Training_loss 0.48719
Training_loss 0.48540


 43%|████▎     | 863/2000 [02:03<02:31,  7.52it/s]

Training_loss 0.48285
Training_loss 0.48192


 43%|████▎     | 866/2000 [02:03<02:12,  8.53it/s]

Training_loss 0.47913
Training_loss 0.47762
Training_loss 0.47583


 43%|████▎     | 869/2000 [02:04<01:58,  9.52it/s]

Training_loss 0.47220
Training_loss 0.47109
Training_loss 0.46958


 44%|████▎     | 871/2000 [02:04<01:53,  9.98it/s]

Training_loss 0.46711


 44%|████▎     | 873/2000 [02:04<02:52,  6.52it/s]

Training_loss 0.46524
Training_loss 0.46267


 44%|████▍     | 875/2000 [02:04<02:47,  6.71it/s]

Training_loss 0.46130
Training_loss 0.46018


 44%|████▍     | 877/2000 [02:05<02:31,  7.40it/s]

Training_loss 0.45638
Training_loss 0.45507
Training_loss 0.45332


 44%|████▍     | 881/2000 [02:05<01:55,  9.65it/s]

Training_loss 0.45072
Training_loss 0.44884
Training_loss 0.44642


 44%|████▍     | 883/2000 [02:05<01:52,  9.96it/s]

Training_loss 0.44482
Training_loss 0.44347


 44%|████▍     | 885/2000 [02:06<02:18,  8.05it/s]

Training_loss 0.44116
Training_loss 0.43954


 44%|████▍     | 887/2000 [02:06<02:22,  7.83it/s]

Training_loss 0.43754
Training_loss 0.43556


 44%|████▍     | 889/2000 [02:06<02:29,  7.43it/s]

Training_loss 0.43390
Training_loss 0.43238


 45%|████▍     | 892/2000 [02:06<02:05,  8.85it/s]

Training_loss 0.43116
Training_loss 0.42963
Training_loss 0.42831


 45%|████▍     | 894/2000 [02:07<01:55,  9.59it/s]

Training_loss 0.42617
Training_loss 0.42479
Training_loss 0.42267


 45%|████▍     | 897/2000 [02:07<02:00,  9.14it/s]

Training_loss 0.42109
Training_loss 0.41904


 45%|████▌     | 900/2000 [02:07<01:53,  9.67it/s]

Training_loss 0.41739
Training_loss 0.41579
Training_loss 0.41337


 45%|████▌     | 902/2000 [02:07<01:45, 10.36it/s]

Training_loss 0.41184
Training_loss 0.41028
Training_loss 0.40932


 45%|████▌     | 906/2000 [02:08<01:43, 10.59it/s]

Training_loss 0.40782
Training_loss 0.40576
Training_loss 0.40326


 45%|████▌     | 908/2000 [02:08<01:40, 10.86it/s]

Training_loss 0.40135
Training_loss 0.39858
Training_loss 0.39684


 46%|████▌     | 912/2000 [02:08<01:41, 10.75it/s]

Training_loss 0.39472
Training_loss 0.39343
Training_loss 0.39205


 46%|████▌     | 914/2000 [02:09<01:43, 10.48it/s]

Training_loss 0.38894
Training_loss 0.38724
Training_loss 0.38432


 46%|████▌     | 918/2000 [02:09<01:37, 11.05it/s]

Training_loss 0.38273
Training_loss 0.38079
Training_loss 0.37904


 46%|████▌     | 920/2000 [02:09<01:38, 10.98it/s]

Training_loss 0.37741
Training_loss 0.37581
Training_loss 0.37492


 46%|████▌     | 924/2000 [02:09<01:32, 11.60it/s]

Training_loss 0.37292
Training_loss 0.37081
Training_loss 0.36869


 46%|████▋     | 926/2000 [02:10<01:33, 11.43it/s]

Training_loss 0.36707
Training_loss 0.36596
Training_loss 0.36437


 46%|████▋     | 930/2000 [02:10<01:34, 11.34it/s]

Training_loss 0.36263
Training_loss 0.36053
Training_loss 0.35871


 47%|████▋     | 932/2000 [02:10<01:32, 11.58it/s]

Training_loss 0.35767
Training_loss 0.35650
Training_loss 0.35486


 47%|████▋     | 936/2000 [02:10<01:36, 11.04it/s]

Training_loss 0.35325
Training_loss 0.35196
Training_loss 0.35054


 47%|████▋     | 938/2000 [02:11<01:37, 10.93it/s]

Training_loss 0.34847
Training_loss 0.34710
Training_loss 0.34583


 47%|████▋     | 941/2000 [02:11<02:05,  8.45it/s]

Training_loss 0.34451
Training_loss 0.34363


 47%|████▋     | 944/2000 [02:11<01:59,  8.83it/s]

Training_loss 0.34203
Training_loss 0.34052
Training_loss 0.33927


 47%|████▋     | 946/2000 [02:12<02:09,  8.14it/s]

Training_loss 0.33718
Training_loss 0.33592


 47%|████▋     | 948/2000 [02:12<02:00,  8.69it/s]

Training_loss 0.33471
Training_loss 0.33216
Training_loss 0.33023


 48%|████▊     | 951/2000 [02:12<01:57,  8.91it/s]

Training_loss 0.32772
Training_loss 0.32628


 48%|████▊     | 953/2000 [02:13<02:19,  7.49it/s]

Training_loss 0.32476
Training_loss 0.32354


 48%|████▊     | 955/2000 [02:13<02:25,  7.17it/s]

Training_loss 0.32269
Training_loss 0.32148


 48%|████▊     | 957/2000 [02:13<02:34,  6.73it/s]

Training_loss 0.32024
Training_loss 0.31926


 48%|████▊     | 959/2000 [02:13<02:33,  6.76it/s]

Training_loss 0.31822
Training_loss 0.31687


 48%|████▊     | 961/2000 [02:14<02:46,  6.23it/s]

Training_loss 0.31462
Training_loss 0.31367


 48%|████▊     | 962/2000 [02:14<02:45,  6.29it/s]

Training_loss 0.31157
Training_loss 0.31066


 48%|████▊     | 964/2000 [02:14<02:43,  6.34it/s]

Training_loss 0.30956


 48%|████▊     | 966/2000 [02:15<02:20,  7.38it/s]

Training_loss 0.30773
Training_loss 0.30703


 48%|████▊     | 968/2000 [02:15<02:05,  8.21it/s]

Training_loss 0.30586
Training_loss 0.30514


 48%|████▊     | 970/2000 [02:15<02:08,  8.02it/s]

Training_loss 0.30302
Training_loss 0.30214


 49%|████▊     | 972/2000 [02:15<02:02,  8.38it/s]

Training_loss 0.30088
Training_loss 0.30000


 49%|████▊     | 974/2000 [02:15<02:13,  7.69it/s]

Training_loss 0.29883
Training_loss 0.29706


 49%|████▉     | 976/2000 [02:16<02:16,  7.50it/s]

Training_loss 0.29618
Training_loss 0.29473


 49%|████▉     | 978/2000 [02:16<02:18,  7.39it/s]

Training_loss 0.29387
Training_loss 0.29306


 49%|████▉     | 980/2000 [02:16<02:09,  7.87it/s]

Training_loss 0.29186
Training_loss 0.29041


 49%|████▉     | 982/2000 [02:17<02:06,  8.06it/s]

Training_loss 0.28920
Training_loss 0.28759


 49%|████▉     | 985/2000 [02:17<01:52,  9.04it/s]

Training_loss 0.28688
Training_loss 0.28576
Training_loss 0.28440


 49%|████▉     | 987/2000 [02:17<02:14,  7.56it/s]

Training_loss 0.28329
Training_loss 0.28158


 49%|████▉     | 988/2000 [02:17<02:08,  7.89it/s]

Training_loss 0.28013
Training_loss 0.27856


 50%|████▉     | 990/2000 [02:17<02:01,  8.34it/s]

Training_loss 0.27737


 50%|████▉     | 992/2000 [02:18<02:17,  7.32it/s]

Training_loss 0.27620
Training_loss 0.27526


 50%|████▉     | 995/2000 [02:18<01:57,  8.55it/s]

Training_loss 0.27462
Training_loss 0.27371
Training_loss 0.27291


 50%|████▉     | 998/2000 [02:18<01:47,  9.29it/s]

Training_loss 0.27160
Training_loss 0.27057
Training_loss 0.26976


 50%|████▉     | 999/2000 [02:19<01:48,  9.22it/s]

Training_loss 0.26861
Training_loss 0.26780


 50%|█████     | 1003/2000 [02:19<01:42,  9.75it/s]

Training_loss 0.26652
Training_loss 0.26569
Training_loss 0.26458


 50%|█████     | 1005/2000 [02:19<01:38, 10.07it/s]

Training_loss 0.26319
Training_loss 0.26227
Training_loss 0.26084


 50%|█████     | 1007/2000 [02:19<01:38, 10.10it/s]

Training_loss 0.25973
Training_loss 0.25803


 50%|█████     | 1010/2000 [02:20<01:52,  8.78it/s]

Training_loss 0.25725
Training_loss 0.25595


 51%|█████     | 1012/2000 [02:20<02:29,  6.59it/s]

Training_loss 0.25440
Training_loss 0.25379


 51%|█████     | 1014/2000 [02:20<02:24,  6.81it/s]

Training_loss 0.25256
Training_loss 0.25145
Training_loss 0.25008


 51%|█████     | 1017/2000 [02:21<02:04,  7.89it/s]

Training_loss 0.24882
Training_loss 0.24788
Training_loss 0.24645


 51%|█████     | 1020/2000 [02:21<01:54,  8.53it/s]

Training_loss 0.24535
Training_loss 0.24387


 51%|█████     | 1023/2000 [02:21<01:50,  8.87it/s]

Training_loss 0.24326
Training_loss 0.24247
Training_loss 0.24156


 51%|█████▏    | 1025/2000 [02:22<01:44,  9.29it/s]

Training_loss 0.24095
Training_loss 0.24009


 51%|█████▏    | 1027/2000 [02:22<01:55,  8.44it/s]

Training_loss 0.23857
Training_loss 0.23684


 51%|█████▏    | 1029/2000 [02:22<02:01,  8.01it/s]

Training_loss 0.23598
Training_loss 0.23522


 52%|█████▏    | 1031/2000 [02:22<01:50,  8.81it/s]

Training_loss 0.23442
Training_loss 0.23341
Training_loss 0.23193


 52%|█████▏    | 1034/2000 [02:23<01:44,  9.26it/s]

Training_loss 0.23020
Training_loss 0.22845


 52%|█████▏    | 1036/2000 [02:23<01:59,  8.06it/s]

Training_loss 0.22757
Training_loss 0.22634


 52%|█████▏    | 1038/2000 [02:23<02:03,  7.78it/s]

Training_loss 0.22538
Training_loss 0.22436


 52%|█████▏    | 1040/2000 [02:23<02:02,  7.83it/s]

Training_loss 0.22329
Training_loss 0.22283


 52%|█████▏    | 1042/2000 [02:24<01:53,  8.41it/s]

Training_loss 0.22210
Training_loss 0.22151
Training_loss 0.22049


 52%|█████▏    | 1045/2000 [02:24<01:47,  8.91it/s]

Training_loss 0.21928
Training_loss 0.21816


 52%|█████▏    | 1047/2000 [02:24<01:45,  9.01it/s]

Training_loss 0.21744
Training_loss 0.21694


 52%|█████▏    | 1049/2000 [02:24<01:44,  9.08it/s]

Training_loss 0.21620
Training_loss 0.21498


 53%|█████▎    | 1051/2000 [02:25<01:42,  9.23it/s]

Training_loss 0.21401
Training_loss 0.21317


 53%|█████▎    | 1052/2000 [02:25<01:42,  9.29it/s]

Training_loss 0.21233
Training_loss 0.21111


 53%|█████▎    | 1055/2000 [02:25<02:01,  7.81it/s]

Training_loss 0.21003
Training_loss 0.20949


 53%|█████▎    | 1056/2000 [02:25<01:59,  7.89it/s]

Training_loss 0.20893
Training_loss 0.20768


 53%|█████▎    | 1059/2000 [02:26<01:49,  8.57it/s]

Training_loss 0.20691
Training_loss 0.20631
Training_loss 0.20532


 53%|█████▎    | 1063/2000 [02:26<01:33,  9.99it/s]

Training_loss 0.20441
Training_loss 0.20340
Training_loss 0.20272


 53%|█████▎    | 1065/2000 [02:26<01:28, 10.61it/s]

Training_loss 0.20201
Training_loss 0.20155
Training_loss 0.20087


 53%|█████▎    | 1069/2000 [02:27<01:28, 10.47it/s]

Training_loss 0.19980
Training_loss 0.19885
Training_loss 0.19802


 54%|█████▎    | 1071/2000 [02:27<01:28, 10.45it/s]

Training_loss 0.19711
Training_loss 0.19643


 54%|█████▎    | 1073/2000 [02:27<01:31, 10.13it/s]

Training_loss 0.19595
Training_loss 0.19519


 54%|█████▍    | 1075/2000 [02:27<01:32,  9.95it/s]

Training_loss 0.19428
Training_loss 0.19335


 54%|█████▍    | 1077/2000 [02:27<01:36,  9.55it/s]

Training_loss 0.19264
Training_loss 0.19142
Training_loss 0.19086


 54%|█████▍    | 1080/2000 [02:28<01:42,  9.01it/s]

Training_loss 0.19005
Training_loss 0.18920


 54%|█████▍    | 1082/2000 [02:28<01:42,  8.94it/s]

Training_loss 0.18874
Training_loss 0.18782


 54%|█████▍    | 1083/2000 [02:28<01:56,  7.90it/s]

Training_loss 0.18719


 54%|█████▍    | 1084/2000 [02:28<02:15,  6.74it/s]

Training_loss 0.18670


 54%|█████▍    | 1086/2000 [02:29<02:12,  6.88it/s]

Training_loss 0.18576
Training_loss 0.18522


 54%|█████▍    | 1088/2000 [02:29<02:00,  7.58it/s]

Training_loss 0.18442
Training_loss 0.18344


 55%|█████▍    | 1090/2000 [02:29<01:55,  7.86it/s]

Training_loss 0.18236
Training_loss 0.18168


 55%|█████▍    | 1093/2000 [02:29<01:40,  9.04it/s]

Training_loss 0.18113
Training_loss 0.18015
Training_loss 0.17949


 55%|█████▍    | 1095/2000 [02:30<01:45,  8.55it/s]

Training_loss 0.17898
Training_loss 0.17840


 55%|█████▍    | 1097/2000 [02:30<02:12,  6.80it/s]

Training_loss 0.17797
Training_loss 0.17743


 55%|█████▍    | 1099/2000 [02:30<02:04,  7.25it/s]

Training_loss 0.17642
Training_loss 0.17568


 55%|█████▌    | 1101/2000 [02:31<01:50,  8.17it/s]

Training_loss 0.17467
Training_loss 0.17387


 55%|█████▌    | 1104/2000 [02:31<01:39,  9.01it/s]

Training_loss 0.17336
Training_loss 0.17283
Training_loss 0.17210


 55%|█████▌    | 1107/2000 [02:31<01:35,  9.36it/s]

Training_loss 0.17164
Training_loss 0.17086
Training_loss 0.17016


 55%|█████▌    | 1109/2000 [02:31<01:35,  9.38it/s]

Training_loss 0.16963
Training_loss 0.16896


 56%|█████▌    | 1112/2000 [02:32<01:34,  9.41it/s]

Training_loss 0.16835
Training_loss 0.16800
Training_loss 0.16701


 56%|█████▌    | 1114/2000 [02:32<01:31,  9.72it/s]

Training_loss 0.16640
Training_loss 0.16560
Training_loss 0.16521


 56%|█████▌    | 1117/2000 [02:32<01:33,  9.41it/s]

Training_loss 0.16429
Training_loss 0.16374


 56%|█████▌    | 1119/2000 [02:32<01:34,  9.37it/s]

Training_loss 0.16315
Training_loss 0.16278


 56%|█████▌    | 1121/2000 [02:33<01:53,  7.73it/s]

Training_loss 0.16225
Training_loss 0.16150


 56%|█████▌    | 1123/2000 [02:33<01:56,  7.54it/s]

Training_loss 0.16095
Training_loss 0.15969
Training_loss 0.15919


 56%|█████▋    | 1126/2000 [02:33<01:41,  8.63it/s]

Training_loss 0.15787
Training_loss 0.15731


 56%|█████▋    | 1129/2000 [02:34<01:29,  9.74it/s]

Training_loss 0.15692
Training_loss 0.15644
Training_loss 0.15591


 57%|█████▋    | 1131/2000 [02:34<01:30,  9.63it/s]

Training_loss 0.15549
Training_loss 0.15477


 57%|█████▋    | 1134/2000 [02:34<01:31,  9.49it/s]

Training_loss 0.15425
Training_loss 0.15372
Training_loss 0.15298


 57%|█████▋    | 1136/2000 [02:34<01:37,  8.86it/s]

Training_loss 0.15236
Training_loss 0.15177


 57%|█████▋    | 1137/2000 [02:34<01:35,  9.02it/s]

Training_loss 0.15122
Training_loss 0.15009


 57%|█████▋    | 1140/2000 [02:35<01:40,  8.54it/s]

Training_loss 0.14975
Training_loss 0.14933


 57%|█████▋    | 1142/2000 [02:35<02:01,  7.09it/s]

Training_loss 0.14883
Training_loss 0.14821


 57%|█████▋    | 1144/2000 [02:35<01:47,  7.99it/s]

Training_loss 0.14787
Training_loss 0.14736


 57%|█████▋    | 1147/2000 [02:36<01:37,  8.70it/s]

Training_loss 0.14666
Training_loss 0.14557
Training_loss 0.14476


 57%|█████▋    | 1149/2000 [02:36<01:46,  8.01it/s]

Training_loss 0.14420
Training_loss 0.14352


 58%|█████▊    | 1151/2000 [02:36<01:47,  7.91it/s]

Training_loss 0.14306
Training_loss 0.14236


 58%|█████▊    | 1152/2000 [02:37<02:13,  6.37it/s]

Training_loss 0.14196


 58%|█████▊    | 1154/2000 [02:37<02:26,  5.79it/s]

Training_loss 0.14145
Training_loss 0.14076


 58%|█████▊    | 1156/2000 [02:37<02:01,  6.94it/s]

Training_loss 0.14028
Training_loss 0.13985


 58%|█████▊    | 1159/2000 [02:37<01:35,  8.78it/s]

Training_loss 0.13920
Training_loss 0.13881
Training_loss 0.13842


 58%|█████▊    | 1161/2000 [02:38<01:27,  9.62it/s]

Training_loss 0.13781
Training_loss 0.13720
Training_loss 0.13672


 58%|█████▊    | 1165/2000 [02:38<01:18, 10.62it/s]

Training_loss 0.13607
Training_loss 0.13553
Training_loss 0.13500


 58%|█████▊    | 1167/2000 [02:38<01:18, 10.59it/s]

Training_loss 0.13463
Training_loss 0.13439
Training_loss 0.13360


 59%|█████▊    | 1171/2000 [02:39<01:18, 10.50it/s]

Training_loss 0.13309
Training_loss 0.13285
Training_loss 0.13226


 59%|█████▊    | 1173/2000 [02:39<01:19, 10.42it/s]

Training_loss 0.13155
Training_loss 0.13105
Training_loss 0.13061


 59%|█████▉    | 1177/2000 [02:39<01:16, 10.82it/s]

Training_loss 0.13016
Training_loss 0.12957
Training_loss 0.12900


 59%|█████▉    | 1179/2000 [02:39<01:27,  9.36it/s]

Training_loss 0.12866
Training_loss 0.12820


 59%|█████▉    | 1181/2000 [02:40<01:25,  9.54it/s]

Training_loss 0.12791
Training_loss 0.12761


 59%|█████▉    | 1183/2000 [02:40<01:32,  8.86it/s]

Training_loss 0.12719
Training_loss 0.12659


 59%|█████▉    | 1186/2000 [02:40<01:26,  9.41it/s]

Training_loss 0.12599
Training_loss 0.12561
Training_loss 0.12514


 59%|█████▉    | 1189/2000 [02:40<01:21,  9.95it/s]

Training_loss 0.12480
Training_loss 0.12444
Training_loss 0.12393


 60%|█████▉    | 1191/2000 [02:41<01:23,  9.65it/s]

Training_loss 0.12360
Training_loss 0.12307


 60%|█████▉    | 1193/2000 [02:41<01:21,  9.85it/s]

Training_loss 0.12270
Training_loss 0.12217
Training_loss 0.12168


 60%|█████▉    | 1196/2000 [02:41<01:25,  9.36it/s]

Training_loss 0.12147
Training_loss 0.12087


 60%|█████▉    | 1198/2000 [02:41<01:25,  9.33it/s]

Training_loss 0.12043
Training_loss 0.12007


 60%|██████    | 1201/2000 [02:42<01:25,  9.40it/s]

Training_loss 0.11956
Training_loss 0.11876
Training_loss 0.11819


 60%|██████    | 1203/2000 [02:42<01:17, 10.27it/s]

Training_loss 0.11746
Training_loss 0.11691
Training_loss 0.11624


 60%|██████    | 1207/2000 [02:42<01:12, 10.93it/s]

Training_loss 0.11599
Training_loss 0.11558
Training_loss 0.11530


 60%|██████    | 1209/2000 [02:42<01:13, 10.69it/s]

Training_loss 0.11467
Training_loss 0.11424


 61%|██████    | 1211/2000 [02:43<01:18, 10.02it/s]

Training_loss 0.11377
Training_loss 0.11355


 61%|██████    | 1213/2000 [02:43<01:20,  9.77it/s]

Training_loss 0.11312
Training_loss 0.11253


 61%|██████    | 1215/2000 [02:43<01:23,  9.37it/s]

Training_loss 0.11197
Training_loss 0.11169


 61%|██████    | 1217/2000 [02:43<01:26,  9.07it/s]

Training_loss 0.11140
Training_loss 0.11100


 61%|██████    | 1219/2000 [02:44<01:29,  8.75it/s]

Training_loss 0.11048
Training_loss 0.11007


 61%|██████    | 1221/2000 [02:44<01:33,  8.38it/s]

Training_loss 0.10974
Training_loss 0.10947


 61%|██████    | 1223/2000 [02:44<01:29,  8.73it/s]

Training_loss 0.10899
Training_loss 0.10837


 61%|██████▏   | 1225/2000 [02:44<01:24,  9.12it/s]

Training_loss 0.10785
Training_loss 0.10719
Training_loss 0.10683


 61%|██████▏   | 1228/2000 [02:45<01:20,  9.57it/s]

Training_loss 0.10637
Training_loss 0.10579


 62%|██████▏   | 1231/2000 [02:45<01:21,  9.45it/s]

Training_loss 0.10543
Training_loss 0.10504
Training_loss 0.10447


 62%|██████▏   | 1234/2000 [02:45<01:17,  9.95it/s]

Training_loss 0.10418
Training_loss 0.10387
Training_loss 0.10341


 62%|██████▏   | 1236/2000 [02:45<01:28,  8.59it/s]

Training_loss 0.10311
Training_loss 0.10274


 62%|██████▏   | 1238/2000 [02:46<01:23,  9.16it/s]

Training_loss 0.10241
Training_loss 0.10195


 62%|██████▏   | 1240/2000 [02:46<01:25,  8.84it/s]

Training_loss 0.10148
Training_loss 0.10122


 62%|██████▏   | 1242/2000 [02:46<01:25,  8.87it/s]

Training_loss 0.10091
Training_loss 0.10070


 62%|██████▏   | 1244/2000 [02:46<01:41,  7.41it/s]

Training_loss 0.10039
Training_loss 0.10007


 62%|██████▏   | 1246/2000 [02:47<01:32,  8.14it/s]

Training_loss 0.09967
Training_loss 0.09935


 62%|██████▏   | 1248/2000 [02:47<01:32,  8.13it/s]

Training_loss 0.09921
Training_loss 0.09875
Training_loss 0.09840


 63%|██████▎   | 1252/2000 [02:47<01:14, 10.08it/s]

Training_loss 0.09796
Training_loss 0.09770
Training_loss 0.09738


 63%|██████▎   | 1254/2000 [02:47<01:15,  9.89it/s]

Training_loss 0.09702
Training_loss 0.09676


 63%|██████▎   | 1256/2000 [02:48<01:19,  9.39it/s]

Training_loss 0.09619
Training_loss 0.09575


 63%|██████▎   | 1258/2000 [02:48<01:38,  7.56it/s]

Training_loss 0.09537
Training_loss 0.09487


 63%|██████▎   | 1260/2000 [02:48<01:42,  7.21it/s]

Training_loss 0.09458
Training_loss 0.09430


 63%|██████▎   | 1262/2000 [02:49<01:35,  7.73it/s]

Training_loss 0.09390
Training_loss 0.09373


 63%|██████▎   | 1264/2000 [02:49<01:32,  7.97it/s]

Training_loss 0.09331
Training_loss 0.09261


 63%|██████▎   | 1266/2000 [02:49<01:28,  8.26it/s]

Training_loss 0.09218
Training_loss 0.09179


 63%|██████▎   | 1268/2000 [02:49<01:30,  8.10it/s]

Training_loss 0.09139
Training_loss 0.09105


 64%|██████▎   | 1270/2000 [02:50<01:39,  7.34it/s]

Training_loss 0.09081
Training_loss 0.09043


 64%|██████▎   | 1271/2000 [02:50<02:09,  5.63it/s]

Training_loss 0.09012


 64%|██████▎   | 1272/2000 [02:50<02:51,  4.24it/s]

Training_loss 0.08967


 64%|██████▎   | 1273/2000 [02:50<02:57,  4.09it/s]

Training_loss 0.08929


 64%|██████▍   | 1275/2000 [02:51<02:36,  4.63it/s]

Training_loss 0.08891
Training_loss 0.08862


 64%|██████▍   | 1277/2000 [02:51<02:11,  5.51it/s]

Training_loss 0.08840
Training_loss 0.08812


 64%|██████▍   | 1279/2000 [02:51<01:48,  6.67it/s]

Training_loss 0.08772
Training_loss 0.08744


 64%|██████▍   | 1281/2000 [02:52<01:41,  7.08it/s]

Training_loss 0.08720
Training_loss 0.08707
Training_loss 0.08681


 64%|██████▍   | 1283/2000 [02:52<01:39,  7.23it/s]

Training_loss 0.08626


 64%|██████▍   | 1285/2000 [02:52<01:47,  6.66it/s]

Training_loss 0.08593
Training_loss 0.08567


 64%|██████▍   | 1287/2000 [02:53<01:35,  7.47it/s]

Training_loss 0.08522
Training_loss 0.08507


 64%|██████▍   | 1289/2000 [02:53<01:44,  6.78it/s]

Training_loss 0.08480
Training_loss 0.08438


 65%|██████▍   | 1291/2000 [02:53<01:53,  6.26it/s]

Training_loss 0.08407
Training_loss 0.08376


 65%|██████▍   | 1293/2000 [02:54<01:58,  5.98it/s]

Training_loss 0.08327
Training_loss 0.08307


 65%|██████▍   | 1294/2000 [02:54<02:09,  5.46it/s]

Training_loss 0.08291


 65%|██████▍   | 1295/2000 [02:54<02:31,  4.66it/s]

Training_loss 0.08241


 65%|██████▍   | 1296/2000 [02:54<02:30,  4.68it/s]

Training_loss 0.08205


 65%|██████▍   | 1298/2000 [02:55<02:27,  4.76it/s]

Training_loss 0.08186
Training_loss 0.08153


 65%|██████▌   | 1300/2000 [02:55<02:04,  5.61it/s]

Training_loss 0.08137
Training_loss 0.08097


 65%|██████▌   | 1302/2000 [02:55<01:50,  6.32it/s]

Training_loss 0.08066
Training_loss 0.08041


 65%|██████▌   | 1304/2000 [02:56<01:47,  6.50it/s]

Training_loss 0.08016
Training_loss 0.07996


 65%|██████▌   | 1306/2000 [02:56<01:38,  7.08it/s]

Training_loss 0.07966
Training_loss 0.07938


 65%|██████▌   | 1308/2000 [02:56<01:30,  7.62it/s]

Training_loss 0.07902
Training_loss 0.07892


 66%|██████▌   | 1310/2000 [02:56<01:24,  8.12it/s]

Training_loss 0.07875
Training_loss 0.07839


 66%|██████▌   | 1312/2000 [02:57<01:24,  8.14it/s]

Training_loss 0.07819
Training_loss 0.07791


 66%|██████▌   | 1314/2000 [02:57<01:34,  7.28it/s]

Training_loss 0.07764
Training_loss 0.07742


 66%|██████▌   | 1316/2000 [02:57<01:28,  7.73it/s]

Training_loss 0.07723
Training_loss 0.07700


 66%|██████▌   | 1318/2000 [02:57<01:35,  7.12it/s]

Training_loss 0.07679
Training_loss 0.07657


 66%|██████▌   | 1319/2000 [02:58<01:52,  6.06it/s]

Training_loss 0.07633


 66%|██████▌   | 1321/2000 [02:58<02:29,  4.55it/s]

Training_loss 0.07587
Training_loss 0.07554


 66%|██████▌   | 1323/2000 [02:59<02:15,  4.99it/s]

Training_loss 0.07535
Training_loss 0.07519


 66%|██████▌   | 1324/2000 [02:59<02:05,  5.39it/s]

Training_loss 0.07455


 66%|██████▋   | 1326/2000 [02:59<02:09,  5.19it/s]

Training_loss 0.07426
Training_loss 0.07405


 66%|██████▋   | 1328/2000 [02:59<02:04,  5.41it/s]

Training_loss 0.07390
Training_loss 0.07375


 66%|██████▋   | 1330/2000 [03:00<01:43,  6.45it/s]

Training_loss 0.07349
Training_loss 0.07338


 67%|██████▋   | 1332/2000 [03:00<01:28,  7.52it/s]

Training_loss 0.07318
Training_loss 0.07297


 67%|██████▋   | 1334/2000 [03:00<01:21,  8.13it/s]

Training_loss 0.07268
Training_loss 0.07233


 67%|██████▋   | 1336/2000 [03:00<01:22,  8.06it/s]

Training_loss 0.07198
Training_loss 0.07176


 67%|██████▋   | 1339/2000 [03:01<01:15,  8.71it/s]

Training_loss 0.07152
Training_loss 0.07127
Training_loss 0.07102


 67%|██████▋   | 1341/2000 [03:01<01:12,  9.10it/s]

Training_loss 0.07081
Training_loss 0.07043


 67%|██████▋   | 1343/2000 [03:01<01:23,  7.91it/s]

Training_loss 0.07006
Training_loss 0.06989


 67%|██████▋   | 1345/2000 [03:02<01:28,  7.43it/s]

Training_loss 0.06974
Training_loss 0.06934


 67%|██████▋   | 1347/2000 [03:02<01:19,  8.23it/s]

Training_loss 0.06900
Training_loss 0.06870


 68%|██████▊   | 1350/2000 [03:02<01:06,  9.77it/s]

Training_loss 0.06843
Training_loss 0.06826
Training_loss 0.06794


 68%|██████▊   | 1353/2000 [03:02<01:06,  9.74it/s]

Training_loss 0.06754
Training_loss 0.06734
Training_loss 0.06703


 68%|██████▊   | 1355/2000 [03:03<01:08,  9.35it/s]

Training_loss 0.06674
Training_loss 0.06660


 68%|██████▊   | 1357/2000 [03:03<01:10,  9.16it/s]

Training_loss 0.06645
Training_loss 0.06608


 68%|██████▊   | 1359/2000 [03:03<01:08,  9.30it/s]

Training_loss 0.06581
Training_loss 0.06551


 68%|██████▊   | 1361/2000 [03:03<01:24,  7.52it/s]

Training_loss 0.06532
Training_loss 0.06506


 68%|██████▊   | 1363/2000 [03:04<01:23,  7.64it/s]

Training_loss 0.06481
Training_loss 0.06465


 68%|██████▊   | 1365/2000 [03:04<01:15,  8.43it/s]

Training_loss 0.06446
Training_loss 0.06426
Training_loss 0.06396


 68%|██████▊   | 1368/2000 [03:04<01:15,  8.39it/s]

Training_loss 0.06372
Training_loss 0.06356


 68%|██████▊   | 1370/2000 [03:04<01:31,  6.92it/s]

Training_loss 0.06322
Training_loss 0.06303


 69%|██████▊   | 1372/2000 [03:05<01:46,  5.88it/s]

Training_loss 0.06275
Training_loss 0.06247


 69%|██████▊   | 1374/2000 [03:05<01:37,  6.41it/s]

Training_loss 0.06219
Training_loss 0.06202


 69%|██████▉   | 1376/2000 [03:05<01:28,  7.02it/s]

Training_loss 0.06183
Training_loss 0.06169


 69%|██████▉   | 1378/2000 [03:06<01:23,  7.44it/s]

Training_loss 0.06143
Training_loss 0.06116


 69%|██████▉   | 1380/2000 [03:06<01:19,  7.82it/s]

Training_loss 0.06098
Training_loss 0.06075


 69%|██████▉   | 1382/2000 [03:06<01:10,  8.70it/s]

Training_loss 0.06050
Training_loss 0.06010
Training_loss 0.05983


 69%|██████▉   | 1385/2000 [03:06<01:07,  9.05it/s]

Training_loss 0.05968
Training_loss 0.05937
Training_loss 0.05915


 69%|██████▉   | 1389/2000 [03:07<01:02,  9.71it/s]

Training_loss 0.05893
Training_loss 0.05881
Training_loss 0.05867


 70%|██████▉   | 1391/2000 [03:07<01:03,  9.52it/s]

Training_loss 0.05844
Training_loss 0.05821


 70%|██████▉   | 1393/2000 [03:07<01:08,  8.80it/s]

Training_loss 0.05793
Training_loss 0.05781


 70%|██████▉   | 1395/2000 [03:08<01:07,  9.02it/s]

Training_loss 0.05767
Training_loss 0.05750


 70%|██████▉   | 1398/2000 [03:08<01:01,  9.82it/s]

Training_loss 0.05716
Training_loss 0.05691
Training_loss 0.05678


 70%|███████   | 1401/2000 [03:08<01:00,  9.88it/s]

Training_loss 0.05649
Training_loss 0.05635
Training_loss 0.05617


 70%|███████   | 1403/2000 [03:08<01:03,  9.40it/s]

Training_loss 0.05604
Training_loss 0.05585


 70%|███████   | 1405/2000 [03:09<01:07,  8.82it/s]

Training_loss 0.05549
Training_loss 0.05532


 70%|███████   | 1407/2000 [03:09<01:06,  8.93it/s]

Training_loss 0.05516
Training_loss 0.05495


 70%|███████   | 1408/2000 [03:09<01:06,  8.94it/s]

Training_loss 0.05461
Training_loss 0.05434


 71%|███████   | 1411/2000 [03:09<01:19,  7.43it/s]

Training_loss 0.05395
Training_loss 0.05385


 71%|███████   | 1414/2000 [03:10<01:07,  8.64it/s]

Training_loss 0.05374
Training_loss 0.05349
Training_loss 0.05332


 71%|███████   | 1416/2000 [03:10<01:07,  8.61it/s]

Training_loss 0.05313
Training_loss 0.05300


 71%|███████   | 1419/2000 [03:10<01:04,  9.02it/s]

Training_loss 0.05279
Training_loss 0.05268
Training_loss 0.05247


 71%|███████   | 1421/2000 [03:10<01:04,  9.03it/s]

Training_loss 0.05236
Training_loss 0.05208


 71%|███████   | 1423/2000 [03:11<01:06,  8.66it/s]

Training_loss 0.05189
Training_loss 0.05182


 71%|███████▏  | 1426/2000 [03:11<01:01,  9.28it/s]

Training_loss 0.05164
Training_loss 0.05146
Training_loss 0.05129


 71%|███████▏  | 1428/2000 [03:11<01:02,  9.20it/s]

Training_loss 0.05118
Training_loss 0.05089


 72%|███████▏  | 1430/2000 [03:11<01:01,  9.28it/s]

Training_loss 0.05069
Training_loss 0.05052


 72%|███████▏  | 1432/2000 [03:12<01:01,  9.24it/s]

Training_loss 0.05025
Training_loss 0.05013


 72%|███████▏  | 1435/2000 [03:12<00:56, 10.02it/s]

Training_loss 0.04990
Training_loss 0.04971
Training_loss 0.04953


 72%|███████▏  | 1437/2000 [03:12<00:56, 10.03it/s]

Training_loss 0.04932
Training_loss 0.04912


 72%|███████▏  | 1440/2000 [03:12<00:55, 10.08it/s]

Training_loss 0.04897
Training_loss 0.04880
Training_loss 0.04864


 72%|███████▏  | 1442/2000 [03:13<00:55, 10.11it/s]

Training_loss 0.04851
Training_loss 0.04833


 72%|███████▏  | 1444/2000 [03:13<00:57,  9.68it/s]

Training_loss 0.04819
Training_loss 0.04801


 72%|███████▏  | 1446/2000 [03:13<00:58,  9.54it/s]

Training_loss 0.04783
Training_loss 0.04763


 72%|███████▏  | 1448/2000 [03:13<01:05,  8.37it/s]

Training_loss 0.04747
Training_loss 0.04725


 72%|███████▎  | 1450/2000 [03:14<01:19,  6.95it/s]

Training_loss 0.04698
Training_loss 0.04690


 73%|███████▎  | 1453/2000 [03:14<01:05,  8.39it/s]

Training_loss 0.04669
Training_loss 0.04654
Training_loss 0.04636


 73%|███████▎  | 1455/2000 [03:14<00:58,  9.38it/s]

Training_loss 0.04615
Training_loss 0.04599
Training_loss 0.04586


 73%|███████▎  | 1459/2000 [03:14<00:48, 11.07it/s]

Training_loss 0.04568
Training_loss 0.04549
Training_loss 0.04539


 73%|███████▎  | 1461/2000 [03:15<00:49, 10.92it/s]

Training_loss 0.04529
Training_loss 0.04521
Training_loss 0.04494


 73%|███████▎  | 1465/2000 [03:15<00:51, 10.36it/s]

Training_loss 0.04480
Training_loss 0.04467
Training_loss 0.04450


 73%|███████▎  | 1467/2000 [03:15<00:48, 10.98it/s]

Training_loss 0.04431
Training_loss 0.04407
Training_loss 0.04393


 74%|███████▎  | 1471/2000 [03:16<00:45, 11.69it/s]

Training_loss 0.04375
Training_loss 0.04362
Training_loss 0.04348


 74%|███████▎  | 1473/2000 [03:16<00:45, 11.70it/s]

Training_loss 0.04324
Training_loss 0.04314
Training_loss 0.04295


 74%|███████▍  | 1475/2000 [03:16<00:55,  9.52it/s]

Training_loss 0.04284
Training_loss 0.04269


 74%|███████▍  | 1479/2000 [03:16<00:54,  9.54it/s]

Training_loss 0.04252
Training_loss 0.04233
Training_loss 0.04211


 74%|███████▍  | 1482/2000 [03:17<00:50, 10.16it/s]

Training_loss 0.04197
Training_loss 0.04182
Training_loss 0.04168


 74%|███████▍  | 1484/2000 [03:17<00:47, 10.90it/s]

Training_loss 0.04153
Training_loss 0.04138
Training_loss 0.04115


 74%|███████▍  | 1488/2000 [03:17<00:45, 11.19it/s]

Training_loss 0.04102
Training_loss 0.04090
Training_loss 0.04066


 74%|███████▍  | 1490/2000 [03:17<00:47, 10.64it/s]

Training_loss 0.04049
Training_loss 0.04034


 75%|███████▍  | 1492/2000 [03:18<00:47, 10.69it/s]

Training_loss 0.04026
Training_loss 0.04013
Training_loss 0.04001


 75%|███████▍  | 1496/2000 [03:18<00:44, 11.38it/s]

Training_loss 0.03992
Training_loss 0.03971
Training_loss 0.03963


 75%|███████▍  | 1498/2000 [03:18<00:42, 11.72it/s]

Training_loss 0.03953
Training_loss 0.03946
Training_loss 0.03928


 75%|███████▌  | 1500/2000 [03:18<00:46, 10.74it/s]

Training_loss 0.03917
Training_loss 0.03907


 75%|███████▌  | 1503/2000 [03:19<00:53,  9.30it/s]

Training_loss 0.03887
Training_loss 0.03869


 75%|███████▌  | 1505/2000 [03:19<00:49,  9.96it/s]

Training_loss 0.03854
Training_loss 0.03838
Training_loss 0.03834


 75%|███████▌  | 1507/2000 [03:19<00:46, 10.68it/s]

Training_loss 0.03814
Training_loss 0.03805


 75%|███████▌  | 1509/2000 [03:19<00:49,  9.91it/s]

Training_loss 0.03795
Training_loss 0.03783


 76%|███████▌  | 1512/2000 [03:20<00:54,  8.88it/s]

Training_loss 0.03768
Training_loss 0.03749


 76%|███████▌  | 1514/2000 [03:20<00:57,  8.40it/s]

Training_loss 0.03746
Training_loss 0.03735


 76%|███████▌  | 1517/2000 [03:20<00:54,  8.90it/s]

Training_loss 0.03723
Training_loss 0.03713
Training_loss 0.03689


 76%|███████▌  | 1519/2000 [03:20<00:52,  9.16it/s]

Training_loss 0.03676
Training_loss 0.03660


 76%|███████▌  | 1521/2000 [03:21<01:01,  7.80it/s]

Training_loss 0.03647
Training_loss 0.03636


 76%|███████▌  | 1523/2000 [03:21<01:08,  7.01it/s]

Training_loss 0.03613
Training_loss 0.03604


 76%|███████▋  | 1525/2000 [03:21<01:05,  7.29it/s]

Training_loss 0.03591
Training_loss 0.03579
Training_loss 0.03560


 76%|███████▋  | 1528/2000 [03:22<00:54,  8.64it/s]

Training_loss 0.03552
Training_loss 0.03533
Training_loss 0.03518


 77%|███████▋  | 1532/2000 [03:22<00:47,  9.86it/s]

Training_loss 0.03510
Training_loss 0.03497
Training_loss 0.03491


 77%|███████▋  | 1534/2000 [03:22<00:43, 10.59it/s]

Training_loss 0.03477
Training_loss 0.03463
Training_loss 0.03452


 77%|███████▋  | 1536/2000 [03:22<00:45, 10.24it/s]

Training_loss 0.03438


 77%|███████▋  | 1538/2000 [03:23<00:58,  7.94it/s]

Training_loss 0.03428
Training_loss 0.03417


 77%|███████▋  | 1540/2000 [03:23<00:53,  8.57it/s]

Training_loss 0.03403
Training_loss 0.03397


 77%|███████▋  | 1543/2000 [03:23<00:45, 10.13it/s]

Training_loss 0.03388
Training_loss 0.03379
Training_loss 0.03367


 77%|███████▋  | 1545/2000 [03:23<00:49,  9.21it/s]

Training_loss 0.03357
Training_loss 0.03343


 77%|███████▋  | 1547/2000 [03:24<00:53,  8.42it/s]

Training_loss 0.03327
Training_loss 0.03308


 77%|███████▋  | 1549/2000 [03:24<00:50,  8.96it/s]

Training_loss 0.03296
Training_loss 0.03289
Training_loss 0.03278


 78%|███████▊  | 1553/2000 [03:24<00:42, 10.46it/s]

Training_loss 0.03269
Training_loss 0.03259
Training_loss 0.03241


 78%|███████▊  | 1555/2000 [03:24<00:41, 10.85it/s]

Training_loss 0.03234
Training_loss 0.03225
Training_loss 0.03214


 78%|███████▊  | 1559/2000 [03:25<00:40, 10.86it/s]

Training_loss 0.03200
Training_loss 0.03190
Training_loss 0.03178


 78%|███████▊  | 1561/2000 [03:25<00:41, 10.53it/s]

Training_loss 0.03165
Training_loss 0.03153


 78%|███████▊  | 1563/2000 [03:25<00:44,  9.72it/s]

Training_loss 0.03146
Training_loss 0.03135


 78%|███████▊  | 1564/2000 [03:25<00:52,  8.30it/s]

Training_loss 0.03125


 78%|███████▊  | 1567/2000 [03:26<00:54,  7.98it/s]

Training_loss 0.03114
Training_loss 0.03106
Training_loss 0.03094


 78%|███████▊  | 1570/2000 [03:26<00:47,  9.10it/s]

Training_loss 0.03075
Training_loss 0.03062
Training_loss 0.03055


 79%|███████▊  | 1572/2000 [03:26<00:43,  9.87it/s]

Training_loss 0.03044
Training_loss 0.03028
Training_loss 0.03023


 79%|███████▊  | 1574/2000 [03:26<00:39, 10.85it/s]

Training_loss 0.03010
Training_loss 0.02998


 79%|███████▉  | 1577/2000 [03:27<00:48,  8.67it/s]

Training_loss 0.02986
Training_loss 0.02977


 79%|███████▉  | 1579/2000 [03:27<00:52,  8.01it/s]

Training_loss 0.02964
Training_loss 0.02953


 79%|███████▉  | 1580/2000 [03:27<00:50,  8.40it/s]

Training_loss 0.02947
Training_loss 0.02934


 79%|███████▉  | 1583/2000 [03:28<00:56,  7.40it/s]

Training_loss 0.02924
Training_loss 0.02920


 79%|███████▉  | 1584/2000 [03:28<00:58,  7.09it/s]

Training_loss 0.02916
Training_loss 0.02905


 79%|███████▉  | 1587/2000 [03:28<00:54,  7.62it/s]

Training_loss 0.02896
Training_loss 0.02888


 79%|███████▉  | 1589/2000 [03:29<00:54,  7.50it/s]

Training_loss 0.02876
Training_loss 0.02866


 80%|███████▉  | 1591/2000 [03:29<00:57,  7.05it/s]

Training_loss 0.02850
Training_loss 0.02847


 80%|███████▉  | 1593/2000 [03:29<00:50,  8.09it/s]

Training_loss 0.02838
Training_loss 0.02823


 80%|███████▉  | 1595/2000 [03:29<00:53,  7.58it/s]

Training_loss 0.02814
Training_loss 0.02807


 80%|███████▉  | 1597/2000 [03:30<00:49,  8.19it/s]

Training_loss 0.02800
Training_loss 0.02789


 80%|███████▉  | 1599/2000 [03:30<00:45,  8.78it/s]

Training_loss 0.02780
Training_loss 0.02769
Training_loss 0.02761


 80%|████████  | 1603/2000 [03:30<00:39, 10.16it/s]

Training_loss 0.02751
Training_loss 0.02739
Training_loss 0.02729


 80%|████████  | 1605/2000 [03:30<00:37, 10.51it/s]

Training_loss 0.02720
Training_loss 0.02711
Training_loss 0.02704


 80%|████████  | 1609/2000 [03:31<00:34, 11.20it/s]

Training_loss 0.02694
Training_loss 0.02682
Training_loss 0.02674


 81%|████████  | 1611/2000 [03:31<00:36, 10.77it/s]

Training_loss 0.02671
Training_loss 0.02665
Training_loss 0.02653


 81%|████████  | 1615/2000 [03:31<00:33, 11.45it/s]

Training_loss 0.02639
Training_loss 0.02630
Training_loss 0.02621


 81%|████████  | 1617/2000 [03:31<00:34, 10.98it/s]

Training_loss 0.02611
Training_loss 0.02604
Training_loss 0.02598


 81%|████████  | 1621/2000 [03:32<00:35, 10.81it/s]

Training_loss 0.02593
Training_loss 0.02586
Training_loss 0.02578


 81%|████████  | 1623/2000 [03:32<00:37,  9.99it/s]

Training_loss 0.02569
Training_loss 0.02556


 81%|████████▏ | 1626/2000 [03:32<00:40,  9.30it/s]

Training_loss 0.02545
Training_loss 0.02532
Training_loss 0.02526


 81%|████████▏ | 1629/2000 [03:33<00:36, 10.07it/s]

Training_loss 0.02521
Training_loss 0.02512
Training_loss 0.02503


 82%|████████▏ | 1631/2000 [03:33<00:44,  8.33it/s]

Training_loss 0.02495
Training_loss 0.02490


 82%|████████▏ | 1632/2000 [03:33<00:45,  8.15it/s]

Training_loss 0.02483


 82%|████████▏ | 1634/2000 [03:33<00:55,  6.55it/s]

Training_loss 0.02471
Training_loss 0.02462


 82%|████████▏ | 1637/2000 [03:34<00:47,  7.65it/s]

Training_loss 0.02451
Training_loss 0.02440
Training_loss 0.02436


 82%|████████▏ | 1639/2000 [03:34<00:45,  7.85it/s]

Training_loss 0.02429
Training_loss 0.02416


 82%|████████▏ | 1641/2000 [03:34<00:44,  8.00it/s]

Training_loss 0.02413
Training_loss 0.02405


 82%|████████▏ | 1643/2000 [03:35<00:43,  8.23it/s]

Training_loss 0.02393
Training_loss 0.02381


 82%|████████▏ | 1645/2000 [03:35<00:49,  7.19it/s]

Training_loss 0.02375
Training_loss 0.02368


 82%|████████▏ | 1647/2000 [03:35<00:44,  7.89it/s]

Training_loss 0.02363
Training_loss 0.02355
Training_loss 0.02346


 83%|████████▎ | 1651/2000 [03:35<00:36,  9.48it/s]

Training_loss 0.02337
Training_loss 0.02329
Training_loss 0.02322


 83%|████████▎ | 1653/2000 [03:36<00:33, 10.25it/s]

Training_loss 0.02316
Training_loss 0.02308
Training_loss 0.02303


 83%|████████▎ | 1655/2000 [03:36<00:31, 10.93it/s]

Training_loss 0.02291
Training_loss 0.02280


 83%|████████▎ | 1657/2000 [03:36<00:36,  9.43it/s]

Training_loss 0.02272
Training_loss 0.02265


 83%|████████▎ | 1659/2000 [03:36<00:36,  9.40it/s]

Training_loss 0.02256
Training_loss 0.02249


 83%|████████▎ | 1661/2000 [03:37<00:36,  9.27it/s]

Training_loss 0.02240


 83%|████████▎ | 1663/2000 [03:37<00:41,  8.15it/s]

Training_loss 0.02232
Training_loss 0.02228
Training_loss 0.02222


 83%|████████▎ | 1666/2000 [03:37<00:36,  9.15it/s]

Training_loss 0.02219
Training_loss 0.02213
Training_loss 0.02208


 83%|████████▎ | 1668/2000 [03:37<00:33,  9.85it/s]

Training_loss 0.02200
Training_loss 0.02193


 84%|████████▎ | 1672/2000 [03:38<00:32,  9.96it/s]

Training_loss 0.02185
Training_loss 0.02175
Training_loss 0.02168


 84%|████████▎ | 1674/2000 [03:38<00:31, 10.45it/s]

Training_loss 0.02158
Training_loss 0.02152
Training_loss 0.02147


 84%|████████▍ | 1678/2000 [03:38<00:31, 10.33it/s]

Training_loss 0.02136
Training_loss 0.02130
Training_loss 0.02127


 84%|████████▍ | 1680/2000 [03:39<00:34,  9.21it/s]

Training_loss 0.02125
Training_loss 0.02114


 84%|████████▍ | 1682/2000 [03:39<00:35,  8.97it/s]

Training_loss 0.02108
Training_loss 0.02101


 84%|████████▍ | 1684/2000 [03:39<00:37,  8.48it/s]

Training_loss 0.02097
Training_loss 0.02086


 84%|████████▍ | 1687/2000 [03:39<00:33,  9.43it/s]

Training_loss 0.02078
Training_loss 0.02071
Training_loss 0.02061


 84%|████████▍ | 1689/2000 [03:40<00:32,  9.47it/s]

Training_loss 0.02057
Training_loss 0.02052


 85%|████████▍ | 1691/2000 [03:40<00:33,  9.33it/s]

Training_loss 0.02048
Training_loss 0.02043
Training_loss 0.02037


 85%|████████▍ | 1693/2000 [03:40<00:31,  9.71it/s]

Training_loss 0.02033
Training_loss 0.02025


 85%|████████▍ | 1696/2000 [03:40<00:32,  9.44it/s]

Training_loss 0.02017
Training_loss 0.02009


 85%|████████▍ | 1698/2000 [03:40<00:30,  9.99it/s]

Training_loss 0.02006
Training_loss 0.02001


 85%|████████▌ | 1700/2000 [03:41<00:36,  8.20it/s]

Training_loss 0.01994
Training_loss 0.01987


 85%|████████▌ | 1702/2000 [03:41<00:42,  7.07it/s]

Training_loss 0.01980
Training_loss 0.01974


 85%|████████▌ | 1704/2000 [03:41<00:39,  7.50it/s]

Training_loss 0.01968
Training_loss 0.01964


 85%|████████▌ | 1705/2000 [03:41<00:37,  7.86it/s]

Training_loss 0.01955
Training_loss 0.01946


 85%|████████▌ | 1708/2000 [03:42<00:42,  6.95it/s]

Training_loss 0.01942
Training_loss 0.01935


 86%|████████▌ | 1711/2000 [03:42<00:33,  8.58it/s]

Training_loss 0.01930
Training_loss 0.01922
Training_loss 0.01917


 86%|████████▌ | 1713/2000 [03:42<00:32,  8.86it/s]

Training_loss 0.01912
Training_loss 0.01908


 86%|████████▌ | 1714/2000 [03:43<00:35,  7.99it/s]

Training_loss 0.01902


 86%|████████▌ | 1716/2000 [03:43<00:44,  6.40it/s]

Training_loss 0.01896
Training_loss 0.01891


 86%|████████▌ | 1718/2000 [03:43<00:39,  7.18it/s]

Training_loss 0.01880
Training_loss 0.01871


 86%|████████▌ | 1721/2000 [03:44<00:31,  8.73it/s]

Training_loss 0.01865
Training_loss 0.01857
Training_loss 0.01850


 86%|████████▌ | 1723/2000 [03:44<00:30,  8.99it/s]

Training_loss 0.01843
Training_loss 0.01836


 86%|████████▋ | 1725/2000 [03:44<00:31,  8.66it/s]

Training_loss 0.01829
Training_loss 0.01825


 86%|████████▋ | 1727/2000 [03:44<00:33,  8.27it/s]

Training_loss 0.01822
Training_loss 0.01816


 86%|████████▋ | 1730/2000 [03:45<00:28,  9.53it/s]

Training_loss 0.01811
Training_loss 0.01807
Training_loss 0.01803


 87%|████████▋ | 1732/2000 [03:45<00:25, 10.48it/s]

Training_loss 0.01800
Training_loss 0.01795
Training_loss 0.01792


 87%|████████▋ | 1736/2000 [03:45<00:26,  9.91it/s]

Training_loss 0.01786
Training_loss 0.01782
Training_loss 0.01779


 87%|████████▋ | 1739/2000 [03:45<00:26,  9.75it/s]

Training_loss 0.01772
Training_loss 0.01767
Training_loss 0.01764


 87%|████████▋ | 1742/2000 [03:46<00:25, 10.28it/s]

Training_loss 0.01758
Training_loss 0.01753
Training_loss 0.01748


 87%|████████▋ | 1744/2000 [03:46<00:23, 11.01it/s]

Training_loss 0.01741
Training_loss 0.01735
Training_loss 0.01730


 87%|████████▋ | 1748/2000 [03:46<00:20, 12.04it/s]

Training_loss 0.01725
Training_loss 0.01719
Training_loss 0.01717


 88%|████████▊ | 1750/2000 [03:46<00:20, 12.14it/s]

Training_loss 0.01713
Training_loss 0.01708
Training_loss 0.01703


 88%|████████▊ | 1754/2000 [03:47<00:20, 12.13it/s]

Training_loss 0.01697
Training_loss 0.01689
Training_loss 0.01683


 88%|████████▊ | 1756/2000 [03:47<00:20, 11.78it/s]

Training_loss 0.01679
Training_loss 0.01673
Training_loss 0.01667


 88%|████████▊ | 1760/2000 [03:47<00:21, 11.21it/s]

Training_loss 0.01661
Training_loss 0.01656
Training_loss 0.01649


 88%|████████▊ | 1762/2000 [03:47<00:21, 11.05it/s]

Training_loss 0.01642
Training_loss 0.01637
Training_loss 0.01635


 88%|████████▊ | 1764/2000 [03:48<00:20, 11.51it/s]

Training_loss 0.01632
Training_loss 0.01628


 88%|████████▊ | 1768/2000 [03:48<00:21, 10.81it/s]

Training_loss 0.01622
Training_loss 0.01617
Training_loss 0.01613


 88%|████████▊ | 1770/2000 [03:48<00:22, 10.42it/s]

Training_loss 0.01608
Training_loss 0.01605
Training_loss 0.01600


 89%|████████▊ | 1774/2000 [03:49<00:21, 10.43it/s]

Training_loss 0.01595
Training_loss 0.01591
Training_loss 0.01587


 89%|████████▉ | 1776/2000 [03:49<00:22,  9.92it/s]

Training_loss 0.01578
Training_loss 0.01569


 89%|████████▉ | 1778/2000 [03:49<00:21, 10.18it/s]

Training_loss 0.01563
Training_loss 0.01559
Training_loss 0.01555


 89%|████████▉ | 1782/2000 [03:49<00:19, 10.93it/s]

Training_loss 0.01549
Training_loss 0.01545
Training_loss 0.01538


 89%|████████▉ | 1784/2000 [03:49<00:18, 11.45it/s]

Training_loss 0.01533
Training_loss 0.01527
Training_loss 0.01525


 89%|████████▉ | 1788/2000 [03:50<00:18, 11.19it/s]

Training_loss 0.01518
Training_loss 0.01514
Training_loss 0.01510


 90%|████████▉ | 1790/2000 [03:50<00:19, 10.74it/s]

Training_loss 0.01504
Training_loss 0.01498


 90%|████████▉ | 1792/2000 [03:50<00:21,  9.65it/s]

Training_loss 0.01492
Training_loss 0.01488


 90%|████████▉ | 1794/2000 [03:50<00:20, 10.27it/s]

Training_loss 0.01485
Training_loss 0.01482
Training_loss 0.01476


 90%|████████▉ | 1796/2000 [03:51<00:18, 10.90it/s]

Training_loss 0.01471
Training_loss 0.01468


 90%|█████████ | 1800/2000 [03:51<00:19, 10.27it/s]

Training_loss 0.01464
Training_loss 0.01460
Training_loss 0.01453


 90%|█████████ | 1802/2000 [03:51<00:18, 10.86it/s]

Training_loss 0.01450
Training_loss 0.01443


 90%|█████████ | 1804/2000 [03:51<00:21,  9.13it/s]

Training_loss 0.01440
Training_loss 0.01434
Training_loss 0.01430


 90%|█████████ | 1808/2000 [03:52<00:17, 10.99it/s]

Training_loss 0.01426
Training_loss 0.01422
Training_loss 0.01419


 90%|█████████ | 1810/2000 [03:52<00:16, 11.43it/s]

Training_loss 0.01414
Training_loss 0.01409
Training_loss 0.01403


 91%|█████████ | 1812/2000 [03:52<00:16, 11.71it/s]

Training_loss 0.01398
Training_loss 0.01392


 91%|█████████ | 1814/2000 [03:52<00:17, 10.50it/s]

Training_loss 0.01388
Training_loss 0.01386


 91%|█████████ | 1818/2000 [03:53<00:16, 10.80it/s]

Training_loss 0.01379
Training_loss 0.01373
Training_loss 0.01368


 91%|█████████ | 1820/2000 [03:53<00:17, 10.36it/s]

Training_loss 0.01365
Training_loss 0.01361


 91%|█████████ | 1822/2000 [03:53<00:16, 10.62it/s]

Training_loss 0.01354
Training_loss 0.01350
Training_loss 0.01349


 91%|█████████▏| 1826/2000 [03:53<00:15, 11.50it/s]

Training_loss 0.01343
Training_loss 0.01340
Training_loss 0.01337


 91%|█████████▏| 1828/2000 [03:54<00:15, 11.02it/s]

Training_loss 0.01332
Training_loss 0.01329


 92%|█████████▏| 1830/2000 [03:54<00:17,  9.90it/s]

Training_loss 0.01326
Training_loss 0.01323
Training_loss 0.01318


 92%|█████████▏| 1832/2000 [03:54<00:16, 10.21it/s]

Training_loss 0.01315
Training_loss 0.01312


 92%|█████████▏| 1835/2000 [03:54<00:20,  8.21it/s]

Training_loss 0.01307
Training_loss 0.01303


 92%|█████████▏| 1837/2000 [03:55<00:18,  8.99it/s]

Training_loss 0.01297
Training_loss 0.01294


 92%|█████████▏| 1838/2000 [03:55<00:19,  8.31it/s]

Training_loss 0.01292


 92%|█████████▏| 1840/2000 [03:55<00:23,  6.91it/s]

Training_loss 0.01289
Training_loss 0.01282


 92%|█████████▏| 1842/2000 [03:56<00:24,  6.55it/s]

Training_loss 0.01279
Training_loss 0.01275


 92%|█████████▏| 1845/2000 [03:56<00:18,  8.56it/s]

Training_loss 0.01271
Training_loss 0.01268
Training_loss 0.01264


 92%|█████████▏| 1847/2000 [03:56<00:15,  9.63it/s]

Training_loss 0.01262
Training_loss 0.01260
Training_loss 0.01257


 93%|█████████▎| 1851/2000 [03:56<00:14, 10.58it/s]

Training_loss 0.01255
Training_loss 0.01249
Training_loss 0.01243


 93%|█████████▎| 1853/2000 [03:56<00:13, 11.20it/s]

Training_loss 0.01239
Training_loss 0.01235
Training_loss 0.01233


 93%|█████████▎| 1857/2000 [03:57<00:12, 11.63it/s]

Training_loss 0.01227
Training_loss 0.01224
Training_loss 0.01220


 93%|█████████▎| 1859/2000 [03:57<00:11, 12.14it/s]

Training_loss 0.01216
Training_loss 0.01211
Training_loss 0.01209


 93%|█████████▎| 1863/2000 [03:57<00:11, 12.23it/s]

Training_loss 0.01206
Training_loss 0.01203
Training_loss 0.01197


 93%|█████████▎| 1865/2000 [03:57<00:10, 12.46it/s]

Training_loss 0.01194
Training_loss 0.01189
Training_loss 0.01185


 93%|█████████▎| 1867/2000 [03:58<00:11, 11.94it/s]

Training_loss 0.01182
Training_loss 0.01177


 94%|█████████▎| 1871/2000 [03:58<00:11, 10.97it/s]

Training_loss 0.01175
Training_loss 0.01170
Training_loss 0.01168


 94%|█████████▎| 1873/2000 [03:58<00:11, 10.72it/s]

Training_loss 0.01166
Training_loss 0.01162


 94%|█████████▍| 1875/2000 [03:59<00:14,  8.46it/s]

Training_loss 0.01155
Training_loss 0.01150


 94%|█████████▍| 1877/2000 [03:59<00:13,  9.17it/s]

Training_loss 0.01146
Training_loss 0.01145
Training_loss 0.01141


 94%|█████████▍| 1879/2000 [03:59<00:12,  9.42it/s]

Training_loss 0.01138
Training_loss 0.01135


 94%|█████████▍| 1882/2000 [03:59<00:15,  7.70it/s]

Training_loss 0.01133
Training_loss 0.01127


 94%|█████████▍| 1885/2000 [04:00<00:12,  9.10it/s]

Training_loss 0.01122
Training_loss 0.01120
Training_loss 0.01115


 94%|█████████▍| 1887/2000 [04:00<00:11,  9.59it/s]

Training_loss 0.01111
Training_loss 0.01108
Training_loss 0.01104


 95%|█████████▍| 1891/2000 [04:00<00:10, 10.65it/s]

Training_loss 0.01102
Training_loss 0.01097
Training_loss 0.01094


 95%|█████████▍| 1893/2000 [04:01<00:11,  9.67it/s]

Training_loss 0.01091
Training_loss 0.01089


 95%|█████████▍| 1895/2000 [04:01<00:10, 10.11it/s]

Training_loss 0.01082
Training_loss 0.01078


 95%|█████████▍| 1897/2000 [04:01<00:10,  9.96it/s]

Training_loss 0.01074
Training_loss 0.01069
Training_loss 0.01066


 95%|█████████▌| 1901/2000 [04:01<00:10,  9.72it/s]

Training_loss 0.01061
Training_loss 0.01059
Training_loss 0.01056


 95%|█████████▌| 1903/2000 [04:02<00:09,  9.97it/s]

Training_loss 0.01052
Training_loss 0.01050


 95%|█████████▌| 1905/2000 [04:02<00:09,  9.98it/s]

Training_loss 0.01048
Training_loss 0.01046
Training_loss 0.01045


 95%|█████████▌| 1909/2000 [04:02<00:08, 10.64it/s]

Training_loss 0.01042
Training_loss 0.01039
Training_loss 0.01036


 96%|█████████▌| 1911/2000 [04:02<00:07, 11.18it/s]

Training_loss 0.01033
Training_loss 0.01031
Training_loss 0.01028


 96%|█████████▌| 1915/2000 [04:03<00:07, 11.41it/s]

Training_loss 0.01027
Training_loss 0.01024
Training_loss 0.01021


 96%|█████████▌| 1917/2000 [04:03<00:07, 11.42it/s]

Training_loss 0.01019
Training_loss 0.01015
Training_loss 0.01011


 96%|█████████▌| 1919/2000 [04:03<00:07, 11.38it/s]

Training_loss 0.01008
Training_loss 0.01005


 96%|█████████▌| 1921/2000 [04:03<00:08,  9.74it/s]

Training_loss 0.01003
Training_loss 0.01001


 96%|█████████▌| 1924/2000 [04:04<00:08,  9.34it/s]

Training_loss 0.00998
Training_loss 0.00996


 96%|█████████▋| 1926/2000 [04:04<00:09,  7.90it/s]

Training_loss 0.00994
Training_loss 0.00990


 96%|█████████▋| 1928/2000 [04:04<00:08,  8.62it/s]

Training_loss 0.00986
Training_loss 0.00983


 96%|█████████▋| 1930/2000 [04:04<00:08,  8.58it/s]

Training_loss 0.00980
Training_loss 0.00978
Training_loss 0.00975


 97%|█████████▋| 1934/2000 [04:05<00:06, 10.34it/s]

Training_loss 0.00972
Training_loss 0.00969
Training_loss 0.00966


 97%|█████████▋| 1936/2000 [04:05<00:05, 10.78it/s]

Training_loss 0.00965
Training_loss 0.00962
Training_loss 0.00959


 97%|█████████▋| 1938/2000 [04:05<00:06, 10.28it/s]

Training_loss 0.00956
Training_loss 0.00951


 97%|█████████▋| 1942/2000 [04:06<00:06,  9.40it/s]

Training_loss 0.00949
Training_loss 0.00946
Training_loss 0.00940


 97%|█████████▋| 1944/2000 [04:06<00:05,  9.85it/s]

Training_loss 0.00937
Training_loss 0.00934
Training_loss 0.00930


 97%|█████████▋| 1948/2000 [04:06<00:05, 10.28it/s]

Training_loss 0.00927
Training_loss 0.00925
Training_loss 0.00923


 98%|█████████▊| 1950/2000 [04:06<00:04, 10.61it/s]

Training_loss 0.00920
Training_loss 0.00918
Training_loss 0.00916


 98%|█████████▊| 1952/2000 [04:06<00:04, 10.38it/s]

Training_loss 0.00914
Training_loss 0.00911


 98%|█████████▊| 1956/2000 [04:07<00:04, 10.35it/s]

Training_loss 0.00909
Training_loss 0.00906
Training_loss 0.00904


 98%|█████████▊| 1958/2000 [04:07<00:04, 10.20it/s]

Training_loss 0.00900
Training_loss 0.00897


 98%|█████████▊| 1960/2000 [04:07<00:03, 10.84it/s]

Training_loss 0.00894
Training_loss 0.00892
Training_loss 0.00890


 98%|█████████▊| 1964/2000 [04:08<00:03, 11.17it/s]

Training_loss 0.00889
Training_loss 0.00884
Training_loss 0.00883


 98%|█████████▊| 1966/2000 [04:08<00:02, 11.55it/s]

Training_loss 0.00881
Training_loss 0.00879


 98%|█████████▊| 1968/2000 [04:08<00:03,  9.78it/s]

Training_loss 0.00876
Training_loss 0.00872
Training_loss 0.00870


 98%|█████████▊| 1970/2000 [04:08<00:02, 10.38it/s]

Training_loss 0.00868
Training_loss 0.00865


 99%|█████████▊| 1973/2000 [04:09<00:03,  8.65it/s]

Training_loss 0.00862
Training_loss 0.00856
Training_loss 0.00853


 99%|█████████▉| 1977/2000 [04:09<00:02,  9.72it/s]

Training_loss 0.00849
Training_loss 0.00845
Training_loss 0.00842


 99%|█████████▉| 1979/2000 [04:09<00:01, 10.54it/s]

Training_loss 0.00838
Training_loss 0.00835
Training_loss 0.00833


 99%|█████████▉| 1981/2000 [04:09<00:01, 10.87it/s]

Training_loss 0.00831
Training_loss 0.00828


 99%|█████████▉| 1985/2000 [04:10<00:01, 10.38it/s]

Training_loss 0.00825
Training_loss 0.00825
Training_loss 0.00821


 99%|█████████▉| 1987/2000 [04:10<00:01,  9.94it/s]

Training_loss 0.00819
Training_loss 0.00817
Training_loss 0.00814


100%|█████████▉| 1991/2000 [04:10<00:00, 11.25it/s]

Training_loss 0.00812
Training_loss 0.00810
Training_loss 0.00809


100%|█████████▉| 1993/2000 [04:10<00:00, 11.53it/s]

Training_loss 0.00806
Training_loss 0.00804
Training_loss 0.00802


100%|█████████▉| 1997/2000 [04:11<00:00, 11.37it/s]

Training_loss 0.00798
Training_loss 0.00796
Training_loss 0.00795


100%|█████████▉| 1999/2000 [04:11<00:00, 10.93it/s]

Training_loss 0.00792
Training_loss 0.00790


100%|██████████| 2000/2000 [04:11<00:00,  7.95it/s]

Training_loss 0.00786





In [21]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([-1.9730,  1.9617,  3.0086, -2.9190], grad_fn=<CatBackward0>)

In [22]:
for j in G.neighbors(0):
    print(j)

3
5
11
13
14


In [23]:
parameters_to_vector(models[0].parameters())

tensor([1.9362, 1.9564, 2.9383, 2.9599], grad_fn=<CatBackward0>)

In [24]:
projection_list[0]

[0,
 0,
 0,
 tensor([[ 0.9609,  0.0036,  0.0077, -0.0459],
         [-0.0291,  0.9936,  0.0077, -0.0459],
         [-0.0291,  0.0036,  0.9977, -0.0459],
         [-0.0291,  0.0036,  0.0077,  0.9441]]),
 0,
 tensor([[ 0.0811,  0.0036,  0.0077, -0.0459],
         [-0.0291,  0.1138,  0.0077, -0.0459],
         [-0.0291,  0.0036,  0.1179, -0.0459],
         [-0.0291,  0.0036,  0.0077,  0.0643]]),
 0,
 0,
 0,
 0,
 0,
 tensor([[ 2.6695,  0.0036,  0.0077, -0.0459],
         [-0.0291,  2.7022,  0.0077, -0.0459],
         [-0.0291,  0.0036,  2.7063, -0.0459],
         [-0.0291,  0.0036,  0.0077,  2.6526]]),
 0,
 tensor([[ 1.2376,  0.0036,  0.0077, -0.0459],
         [-0.0291,  1.2703,  0.0077, -0.0459],
         [-0.0291,  0.0036,  1.2744, -0.0459],
         [-0.0291,  0.0036,  0.0077,  1.2208]]),
 tensor([[ 1.7874,  0.0036,  0.0077, -0.0459],
         [-0.0291,  1.8201,  0.0077, -0.0459],
         [-0.0291,  0.0036,  1.8241, -0.0459],
         [-0.0291,  0.0036,  0.0077,  1.7705]]),
 0,
 0,
 0

In [25]:
projected_weights[0]

[0,
 0,
 0,
 tensor([2.4637, 2.4207, 3.7774, 3.8030]),
 0,
 tensor([2.3529, 2.3177, 3.6755, 3.6197]),
 0,
 0,
 0,
 0,
 0,
 tensor([-2.3166,  1.5638,  2.5005, -3.2639]),
 0,
 tensor([ -6.8826,   6.3495,   9.7412, -10.2596]),
 tensor([-2.4359,  2.2153,  3.3876, -3.5679]),
 0,
 0,
 0,
 0,
 0]

In [26]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [27]:
print(test_loss)

[2.89071121e+01 2.86486397e+01 2.85130081e+01 ... 7.92254077e-03
 7.89543171e-03 7.85896205e-03]


In [28]:
np.save( 'training_loss_sheave_fml' + str(lamda).replace('.', '_'), test_loss)
#np.save('relative_error_sheave_fml' + str(lamda).replace('.', '_'), total_rel_error)