In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
#pout = 0.01
pout = 0.01
seed = 0
alpha = 1e-2
lamda = 1e-3
eta = 1e-2
d0 = 9
no_users = sum(cluster_sizes)
batch_size = 50
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.         0.         0.125      0.         0.14285714
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.14285714
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.16666667]
 [0.         0.         0.         0.125      0.14285714 0.14285714
  0.14285714 0.14285714 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.14285714]
 [0.125      0.         0.125      0.         0.125      0.125
  0.125      0.125      0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.125     ]
 [0.         0.         0.14285714 0.125      0.         0.14285714
  0.14285714 0.         0.16666667 0.         0.         0.
  0.         0.         0

In [4]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [5]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [6]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.size)
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split





X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')


X, y = random_split(X_train, y_train, no_users, 1234)

In [7]:
X_train.shape

(14087, 9)

In [8]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 3.0, 3.0])
W2 = np.array([-2.0, 2.0, 3.0, -3.0])
W3 = 2 * W1
W4 = 2  * W2
W = [W1, W2]
m = 200
n = 4

scaler = [1.0, -1.0]

noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i ]) + np.random.normal(0,noise_sd)
        data = X[count]
        data[:, 0:4] *= scaler[i]
        datapoints[count] = {
                'features': data,
                'degree': node_degree(count, G),
                'label': y[count],
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [9]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [10]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(9, 4, bias=False)
        self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        #output = self.fc3(x)
        return output

In [11]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [12]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=100, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([100, 1])
torch.Size([100, 1])
0 tensor(54.3907, grad_fn=<MseLossBackward0>) tensor([ 2.4272, -2.0785, -1.0354,  0.2732, -0.4750, -0.6053, -0.4538,  0.2985,
         1.4427, -0.3811,  0.2589, -0.5102,  1.0318, -0.9978, -0.8853, -1.0514,
        -0.2445,  0.1041, -0.4611,  0.6796, -0.2208, -0.9056,  0.7682,  0.4794,
         0.8367,  1.2635, -0.9853,  1.9676, -1.3036, -1.8072,  0.4253, -1.3689,
        -1.5129, -1.2250,  2.7457,  2.0146, -3.2404, -3.3948, -3.3887, -4.2334]) tensor([ 0.1985, -0.2981, -0.1796, -0.1065, -0.0989,  0.2372, -0.3311, -0.1812,
        -0.0091, -0.0843, -0.2636,  0.1832, -0.0435,  0.2756,  0.2815,  0.1610,
         0.0514, -0.1910, -0.0527,  0.1134, -0.1045, -0.0957,  0.2497, -0.1215,
         0.3243,  0.1376, -0.2712, -0.0478, -0.3024, -0.1488, -0.1151, -0.2414,
         0.0184, -0.1014,  0.2378,  0.1883, -0.3841,  0.2652, -0.3120, -0.4675],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
0 tensor(51.7729, grad_fn=<MseLossBac

torch.Size([100, 1])
torch.Size([100, 1])
4 tensor(40.1780, grad_fn=<MseLossBackward0>) tensor([-1.4082e-02,  2.3752e-03,  4.4207e-02, -7.2746e-02,  6.9480e-02,
         5.4749e-02,  7.1050e-02,  1.1311e-02, -7.1166e-03,  6.1126e-01,
        -2.5700e+00,  3.1392e+00, -4.3001e+00,  3.7666e+00,  3.9933e+00,
         3.9523e+00, -4.5018e+00,  7.4920e-01,  8.1520e-01, -1.2315e+00,
         9.5022e-01, -1.4725e+00,  1.4339e+00,  1.3457e+00,  1.4000e+00,
        -1.4240e+00,  1.4265e+00, -2.9005e-02,  1.3664e-01, -2.9442e-01,
         6.0074e-01, -6.0198e-01, -4.1437e-01, -6.4277e-01,  1.2436e-01,
         6.4112e-02, -2.3616e-01,  4.9733e+00, -2.5318e+00,  4.2292e-01]) tensor([ 0.0241, -0.1469, -0.0726, -0.1507, -0.0492,  0.2585, -0.2854, -0.1615,
        -0.0760,  0.1613, -0.2541,  0.1134, -0.7382,  0.8048,  0.7243,  0.7524,
         0.9633, -0.1949, -0.1352,  0.2126, -0.1820,  0.0024,  0.1533, -0.2218,
         0.2300,  0.1839, -0.3620, -0.1919, -0.1946, -0.0114, -0.1350, -0.1113,
       

8 tensor(15.3835, grad_fn=<MseLossBackward0>) tensor([-5.0743e-01,  4.8402e-01,  4.3793e-02, -3.9744e-01,  3.1748e-01,
         2.8579e-01,  3.4084e-01,  1.4540e-01, -5.2081e-02, -8.7417e-01,
        -1.5622e-01,  3.1678e-01, -7.4623e-01, -6.4213e-01,  9.6709e-01,
        -5.2387e-01, -2.0264e-01,  5.6988e-02,  1.6704e+00, -2.1128e+00,
         3.1381e-01, -5.5012e-01, -5.2287e-01, -8.7057e-03, -3.4717e-01,
         3.2350e-01,  6.6443e-01, -7.7435e-02,  5.5116e-02,  1.0860e-01,
         1.1300e-01,  7.8292e-04,  2.6731e-02, -1.5846e-02, -1.3508e-01,
        -2.2348e-02, -9.0272e-01, -3.7119e-01, -2.4165e+00,  5.9251e-02]) tensor([ 9.2720e-02, -2.0989e-01, -1.2520e-01, -9.5987e-02, -1.0191e-01,
         2.2755e-01, -3.3719e-01, -1.8946e-01, -5.8157e-02,  5.1002e-01,
        -2.2646e-01, -5.7307e-01, -5.5648e-01,  5.6892e-01,  4.1022e-01,
         5.5780e-01,  1.8173e+00,  9.0777e-02, -7.8823e-01,  9.6813e-01,
        -4.7348e-01,  3.9357e-01, -1.7070e-01, -6.0789e-01, -8.1995e-02,
    

         3.2812e-02, -1.2265e+00, -2.9006e-01, -1.0738e+00,  1.0753e-01]) tensor([ 0.3833, -0.4952, -0.0833,  0.0210, -0.2126,  0.1188, -0.4518, -0.2451,
         0.0219,  0.9180, -0.3705, -0.7941, -0.2326,  0.4247,  0.0957,  0.4613,
         1.8267,  0.2738, -1.2286,  1.5762, -0.4053,  0.3193,  0.1651, -0.5123,
         0.2594,  0.3055, -0.3299, -0.1821, -0.1895, -0.0173, -0.1686, -0.0973,
         0.0821,  0.0178,  0.0445,  0.0410,  0.6997,  2.2380,  2.1255, -0.1937],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
12 tensor(9.2133, grad_fn=<MseLossBackward0>) tensor([-7.3631e-01,  8.5390e-01, -6.6186e-01, -2.9760e-01,  3.5143e-01,
         6.4365e-01,  3.6727e-01, -1.1974e-01, -2.7718e-01, -1.2841e+00,
         4.9511e-01, -3.2175e-01, -1.2123e+00,  1.4086e+00,  2.2373e+00,
         1.4657e+00, -1.1855e+00, -8.2640e-01, -2.8291e-01, -2.1491e-01,
        -1.5256e+00,  1.5747e-01, -4.2101e-01, -2.7476e-01, -2.7238e-01,
        -1.9849e+00, -1.9351e-01, -1.7273

17 tensor(6.2656, grad_fn=<MseLossBackward0>) tensor([-0.3355,  0.0814, -0.2115, -0.0968, -0.1803,  0.0086, -0.1713,  0.5480,
        -0.1740,  0.2440, -1.4408,  0.7908,  0.0168, -0.5397, -0.2965, -0.5530,
         0.5205, -0.6584, -0.9746, -0.0954,  1.2355, -0.0150, -0.6579, -0.6724,
        -1.0773, -0.9638,  0.2936,  0.0097,  0.0965, -0.1550,  0.0707, -0.0406,
         0.0084, -0.0167,  0.0046,  0.0232, -0.3447,  0.3126, -0.1614,  0.0916]) tensor([ 5.7464e-01, -6.7803e-01,  1.1678e-01,  9.8237e-02, -2.6787e-01,
         2.9363e-02, -5.0679e-01, -3.6839e-01,  1.8122e-02,  1.1527e+00,
        -3.7075e-01, -5.4237e-01, -4.8036e-02,  3.8377e-01, -1.3784e-01,
         4.3532e-01,  1.7329e+00,  3.0633e-01, -1.1725e+00,  1.7715e+00,
        -3.8248e-01,  3.1936e-01,  3.2349e-01, -4.7395e-01,  4.6397e-01,
         4.2263e-01, -2.1106e-01, -1.8962e-01, -1.8604e-01, -3.5326e-02,
        -1.6383e-01, -1.1034e-01,  7.0647e-02,  1.4480e-03,  7.1290e-02,
         5.8803e-02,  1.0206e+00,  2.1722e

21 tensor(6.1832, grad_fn=<MseLossBackward0>) tensor([-0.3790,  0.2716, -1.5295,  0.1386,  0.3354,  0.2913,  0.0857,  0.0167,
         0.0753, -1.1040,  0.6162, -1.6675,  0.9092, -0.0198, -0.1747, -0.5314,
        -0.6960,  0.2775,  0.1804, -0.4643, -2.1292, -0.5009,  0.3948,  0.7903,
         0.3135, -0.9882,  1.2849,  0.1153, -0.0975,  0.1451, -0.1834,  0.0762,
         0.1018,  0.1329,  0.0126, -0.0292, -0.8149, -0.9241, -0.5354, -0.0698]) tensor([ 6.3736e-01, -6.9700e-01,  2.3064e-01,  1.1131e-01, -2.6268e-01,
         2.7667e-03, -4.9070e-01, -4.5440e-01, -1.2810e-03,  1.2355e+00,
        -2.2710e-01, -4.7654e-01,  1.1038e-02,  4.1217e-01, -2.4413e-01,
         4.9463e-01,  1.6768e+00,  3.3603e-01, -1.0275e+00,  1.8466e+00,
        -4.0854e-01,  3.9763e-01,  3.4509e-01, -4.8818e-01,  5.3023e-01,
         5.2854e-01, -2.0193e-01, -1.9973e-01, -1.8703e-01, -3.8178e-02,
        -1.6306e-01, -1.1569e-01,  6.7979e-02, -5.8956e-03,  8.4777e-02,
         6.9266e-02,  1.1107e+00,  2.1656e

26 tensor(5.2635, grad_fn=<MseLossBackward0>) tensor([-0.1067, -0.2457,  0.0077, -0.2117, -0.0382,  0.0326, -0.0657,  0.3546,
        -0.1043,  0.4333, -1.4024,  0.7926,  0.0835, -0.4347, -0.3420, -0.5054,
         0.4075, -0.1765, -0.7174, -0.0895,  0.9384,  0.2030, -0.7454, -0.6243,
        -0.9879, -0.5979,  0.6367, -0.0641,  0.1595, -0.1181,  0.0185, -0.0066,
         0.0620,  0.0157, -0.0088, -0.0498, -0.0526,  0.2795, -0.3468,  0.0650]) tensor([ 0.6771, -0.6745,  0.3295,  0.1135, -0.2673, -0.0192, -0.4757, -0.5015,
        -0.0386,  1.3344, -0.1064, -0.4414,  0.0396,  0.4380, -0.2989,  0.5670,
         1.6326,  0.3235, -0.9066,  1.9283, -0.4153,  0.4713,  0.3608, -0.4858,
         0.5860,  0.5564, -0.1908, -0.2076, -0.1912, -0.0353, -0.1645, -0.1162,
         0.0647, -0.0093,  0.1029,  0.0718,  1.1582,  2.1974,  2.3497, -0.2452],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
26 tensor(6.8325, grad_fn=<MseLossBackward0>) tensor([-0.1934, -0.0389, -0.4380

31 tensor(6.1923, grad_fn=<MseLossBackward0>) tensor([ 0.1409, -0.3264, -0.5864,  0.3529, -0.0839, -0.4288, -0.3744, -0.4021,
         0.2677,  0.1790, -0.7869, -0.8367,  0.5293, -0.4332, -0.5137, -0.8517,
        -0.5605,  0.6608, -0.4656,  0.1713, -0.2691,  1.0098, -0.6362, -0.9694,
        -0.9923, -0.4764,  2.2028,  0.1170, -0.1367,  0.0593, -0.1213,  0.1023,
         0.0962,  0.1322, -0.0592, -0.1674,  0.4369, -0.2501,  0.1485,  0.0236]) tensor([ 0.7115, -0.6377,  0.4033,  0.1133, -0.2828, -0.0391, -0.4699, -0.5290,
        -0.0712,  1.4289,  0.0125, -0.4424,  0.0743,  0.4365, -0.3430,  0.6144,
         1.5850,  0.3183, -0.7841,  2.0021, -0.4109,  0.5486,  0.3638, -0.4904,
         0.6233,  0.5794, -0.1670, -0.2133, -0.1992, -0.0343, -0.1663, -0.1136,
         0.0620, -0.0089,  0.1173,  0.0765,  1.1945,  2.2344,  2.3846, -0.2616],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
31 tensor(4.9409, grad_fn=<MseLossBackward0>) tensor([ 0.0545, -0.3943, -0.1191

         2.2889e-03,  2.3275e-01,  1.0721e+00, -2.7823e+00,  1.1093e-01]) tensor([ 0.7419, -0.5993,  0.4839,  0.0930, -0.2961, -0.0363, -0.4550, -0.5302,
        -0.0934,  1.5033,  0.1240, -0.4248,  0.0898,  0.4325, -0.3701,  0.6663,
         1.5588,  0.3011, -0.6765,  2.0568, -0.3782,  0.5876,  0.3699, -0.4846,
         0.6604,  0.6254, -0.2143, -0.2182, -0.2055, -0.0374, -0.1640, -0.1107,
         0.0596, -0.0088,  0.1283,  0.0839,  1.2188,  2.2773,  2.4154, -0.2736],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
36 tensor(5.5151, grad_fn=<MseLossBackward0>) tensor([-0.2856, -0.0190,  0.2843,  0.1767, -0.1271,  0.0302, -0.0933, -0.1574,
         0.0806, -0.8189,  0.0983, -0.2213,  0.0803, -0.0457,  0.2845, -0.1173,
         0.1529, -0.3738, -2.3424,  2.1554, -0.0134,  1.4424, -1.2420, -1.5790,
        -1.3514,  0.5781,  1.6670,  0.1526, -0.1465,  0.0861, -0.0824,  0.0941,
         0.0419,  0.0780, -0.0418, -0.1214,  0.0903, -0.5217,  2.7220,  0.0587]) tenso

torch.Size([100, 1])
torch.Size([100, 1])
41 tensor(5.4346, grad_fn=<MseLossBackward0>) tensor([-2.2781e-01, -5.0774e-02,  2.2250e-01,  1.1933e-01, -3.0872e-02,
         1.0839e-01, -2.2972e-04, -2.8489e-01,  8.8012e-02, -8.1640e-01,
         1.4841e-01, -1.9203e-01,  5.3045e-02,  1.4092e-02,  3.1007e-01,
        -5.2066e-02,  1.2563e-01, -3.9984e-01, -2.4650e+00,  2.1704e+00,
        -1.7162e-01,  1.6438e+00, -1.2597e+00, -1.6786e+00, -1.3669e+00,
         4.9255e-01,  1.8623e+00,  1.5713e-01, -1.5624e-01,  1.0372e-01,
        -7.1606e-02,  7.5992e-02,  2.4988e-02,  5.8987e-02, -2.2664e-02,
        -1.3486e-01,  1.0470e-01, -5.3444e-01,  2.7590e+00,  5.1933e-02]) tensor([ 0.7683, -0.5504,  0.5274,  0.0860, -0.3253, -0.0317, -0.4571, -0.5367,
        -0.1166,  1.5759,  0.2383, -0.4454,  0.1282,  0.4046, -0.3910,  0.6951,
         1.5100,  0.2823, -0.5993,  2.1531, -0.3853,  0.6473,  0.3452, -0.4996,
         0.6646,  0.6150, -0.1394, -0.2219, -0.2140, -0.0358, -0.1647, -0.1045,
       

45 tensor(2.4919, grad_fn=<MseLossBackward0>) tensor([ 0.0996, -0.4086,  1.0745, -0.8891,  0.7246, -0.2216,  0.7298,  0.4327,
        -0.0398,  0.1857, -0.7617,  2.0028, -1.6572,  1.3505, -0.4130,  1.3603,
         0.8064, -0.0741,  2.4321, -3.2933,  2.2000, -2.5211,  2.3023,  2.4522,
         2.0836,  2.1087, -7.0187, -0.3131,  0.4875, -0.5119,  0.5071, -0.4428,
        -0.2408, -0.4180, -0.3519,  0.8459,  0.0608,  0.8020, -2.7453,  0.0867]) tensor([ 0.7973, -0.5180,  0.5853,  0.0571, -0.3425, -0.0256, -0.4472, -0.5287,
        -0.1362,  1.6465,  0.3307, -0.4344,  0.1239,  0.4055, -0.4193,  0.7500,
         1.4786,  0.2524, -0.4792,  2.1838, -0.3474,  0.6408,  0.3600, -0.4630,
         0.6986,  0.6405, -0.2079, -0.2306, -0.2123, -0.0450, -0.1564, -0.1042,
         0.0524, -0.0051,  0.1536,  0.0950,  1.2666,  2.3608,  2.4689, -0.2943],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
46 tensor(5.3488, grad_fn=<MseLossBackward0>) tensor([-0.2389, -0.0141,  0.2936

50 tensor(5.7013, grad_fn=<MseLossBackward0>) tensor([ 1.6667e-01, -2.4499e-01, -3.2566e-01,  3.7118e-01,  2.9477e-04,
        -3.5742e-01, -3.1646e-01, -3.6326e-01,  1.7325e-01,  1.8603e-01,
        -5.3909e-01, -7.8946e-01,  5.4887e-01, -1.3376e-01, -2.4697e-01,
        -5.6433e-01, -5.6783e-01,  4.8597e-01, -7.4379e-01,  4.3079e-01,
        -1.7067e-01,  1.0210e+00, -5.3987e-01, -1.0583e+00, -8.6537e-01,
        -9.5902e-02,  2.2531e+00,  1.4753e-01, -1.9349e-01,  7.4212e-02,
        -1.6199e-01,  1.2160e-01,  9.6874e-02,  1.4694e-01, -4.8317e-02,
        -2.0345e-01,  2.8971e-01, -2.2424e-01,  5.1580e-01,  1.6876e-02]) tensor([ 8.2705e-01, -4.9838e-01,  6.2370e-01,  3.3678e-02, -3.6190e-01,
        -8.3204e-03, -4.4765e-01, -5.1866e-01, -1.5367e-01,  1.7006e+00,
         4.2343e-01, -4.6907e-01,  1.6162e-01,  4.0683e-01, -4.2723e-01,
         7.7250e-01,  1.4374e+00,  2.3759e-01, -4.0411e-01,  2.2447e+00,
        -3.5299e-01,  6.5718e-01,  3.4413e-01, -4.5310e-01,  6.9682e-01,
    

torch.Size([100, 1])
torch.Size([100, 1])
51 tensor(4.2034, grad_fn=<MseLossBackward0>) tensor([ 0.1247, -0.4209,  0.0241, -0.3677,  0.2714,  0.2328,  0.2223,  0.2292,
        -0.0358,  0.7304, -1.4784,  0.9655,  0.3460, -0.9750, -0.6330, -1.0434,
         0.8339,  0.4676, -0.0817, -0.0470,  0.6714,  0.9466, -1.1100, -0.7083,
        -1.0162, -0.1572,  1.2290, -0.1461,  0.1818, -0.0703, -0.1131,  0.1017,
         0.1093,  0.0918, -0.0308, -0.1044,  0.0068,  0.2249, -0.3213,  0.0119]) tensor([ 8.3062e-01, -4.9133e-01,  6.4161e-01,  2.1669e-02, -3.6574e-01,
         2.2492e-04, -4.4311e-01, -5.1088e-01, -1.6018e-01,  1.7092e+00,
         4.5134e-01, -4.6776e-01,  1.6199e-01,  4.0888e-01, -4.2663e-01,
         7.8453e-01,  1.4353e+00,  2.3003e-01, -3.8004e-01,  2.2516e+00,
        -3.4768e-01,  6.4777e-01,  3.4657e-01, -4.4034e-01,  7.0547e-01,
         6.0933e-01, -1.7721e-01, -2.3557e-01, -2.1443e-01, -5.1649e-02,
        -1.5045e-01, -1.0030e-01,  4.6916e-02, -2.1622e-03,  1.7383e-01,


53 tensor(5.6269, grad_fn=<MseLossBackward0>) tensor([ 0.1608, -0.2223, -0.2676,  0.3470,  0.0372, -0.3210, -0.2829, -0.3490,
         0.1608,  0.3603, -0.9280, -0.4088,  0.2090,  0.1978, -0.0286, -0.2404,
        -0.6540,  0.4401, -0.7700,  0.4744, -0.1546,  1.0284, -0.5443, -1.0602,
        -0.8584, -0.0542,  2.2354,  0.1294, -0.1831,  0.0464, -0.1434,  0.1111,
         0.0595,  0.1261, -0.0636, -0.1656,  0.2487, -0.2115,  0.5612,  0.0276]) tensor([ 0.8415, -0.4835,  0.6664,  0.0098, -0.3748,  0.0054, -0.4450, -0.5067,
        -0.1668,  1.7330,  0.4888, -0.4861,  0.1799,  0.4064, -0.4320,  0.7891,
         1.4154,  0.2302, -0.3557,  2.2780, -0.3426,  0.6588,  0.3348, -0.4465,
         0.6960,  0.6044, -0.1545, -0.2368, -0.2143, -0.0572, -0.1451, -0.1008,
         0.0426, -0.0030,  0.1783,  0.0890,  1.3196,  2.4319,  2.4957, -0.3031],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
53 tensor(4.1473, grad_fn=<MseLossBackward0>) tensor([ 0.2242, -0.5085,  0.1635

54 tensor(4.9873, grad_fn=<MseLossBackward0>) tensor([-0.1351, -0.0773, -1.3092,  0.7472, -0.1035, -0.2199, -0.4256, -0.1651,
        -0.1481, -0.7498,  0.3828, -0.5562,  0.0455,  0.0230, -0.0820, -0.0624,
         0.0608, -0.0960,  0.4820, -0.4308, -1.7135,  0.2298, -0.0362,  0.1856,
        -0.0550, -0.6443,  1.1575,  0.0297, -0.0833,  0.1623, -0.2753,  0.0865,
         0.1645,  0.1641, -0.0047, -0.0857, -0.4760, -0.3071, -0.4708, -0.0954]) tensor([ 8.4494e-01, -4.7287e-01,  6.7990e-01,  1.3962e-03, -3.8415e-01,
         1.0098e-02, -4.4475e-01, -5.0259e-01, -1.7294e-01,  1.7356e+00,
         5.3604e-01, -4.9538e-01,  1.9222e-01,  3.9748e-01, -4.4737e-01,
         7.9145e-01,  1.4139e+00,  2.2690e-01, -3.2433e-01,  2.2830e+00,
        -3.3418e-01,  6.5011e-01,  3.3619e-01, -4.4002e-01,  7.0164e-01,
         6.1992e-01, -1.9497e-01, -2.3585e-01, -2.1571e-01, -6.0463e-02,
        -1.4100e-01, -1.0118e-01,  4.2328e-02, -3.4943e-03,  1.8239e-01,
         9.0031e-02,  1.3262e+00,  2.4464e

57 tensor(4.8963, grad_fn=<MseLossBackward0>) tensor([-0.1187, -0.0983, -1.2384,  0.7670, -0.1223, -0.2287, -0.4448, -0.1671,
        -0.1614, -0.8783,  0.5537, -0.5769, -0.0166,  0.0931, -0.0334,  0.0137,
         0.1062,  0.1123,  0.4992, -0.4313, -1.6702,  0.2813, -0.0808,  0.1383,
        -0.0962, -0.6218,  1.1555,  0.0345, -0.0939,  0.1510, -0.2520,  0.0622,
         0.1337,  0.1342, -0.0049, -0.0749, -0.4520, -0.2820, -0.4691, -0.0893]) tensor([ 0.8508, -0.4552,  0.7190, -0.0217, -0.3998,  0.0269, -0.4448, -0.4959,
        -0.1897,  1.7668,  0.5943, -0.5072,  0.2077,  0.3922, -0.4498,  0.8033,
         1.3950,  0.2198, -0.2816,  2.3133, -0.3253,  0.6467,  0.3269, -0.4335,
         0.6986,  0.6132, -0.1963, -0.2351, -0.2171, -0.0660, -0.1337, -0.1010,
         0.0393, -0.0038,  0.1948,  0.0825,  1.3481,  2.4756,  2.5042, -0.3063],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
57 tensor(2.2912, grad_fn=<MseLossBackward0>) tensor([-0.2027, -0.0481,  0.7563, -0

torch.Size([100, 1])
torch.Size([100, 1])
60 tensor(4.1150, grad_fn=<MseLossBackward0>) tensor([-2.4259e-01,  3.1664e-01,  1.0812e-01,  3.4808e-01, -1.9255e-01,
        -1.2667e-01, -2.5621e-01,  3.0285e-01, -6.8368e-02,  4.6870e-01,
        -3.5228e-01, -3.0212e-01, -5.7326e-01,  8.4405e-01,  1.0087e+00,
         7.7650e-01,  7.1421e-02,  2.0087e-01, -3.6962e-01,  1.3170e-02,
        -2.0643e-02, -7.2140e-01,  6.4449e-01,  1.0676e+00,  5.5148e-01,
         1.3033e-01,  2.4100e-01,  5.3694e-02, -3.9871e-02, -4.3777e-04,
         3.4660e-02, -3.9700e-02, -1.0631e-01, -4.5838e-02, -6.9585e-02,
         1.2316e-02, -1.6253e-01,  5.2303e-01, -5.8472e-02,  6.7146e-02]) tensor([ 0.8545, -0.4380,  0.7548, -0.0438, -0.4172,  0.0408, -0.4473, -0.4838,
        -0.2054,  1.8002,  0.6443, -0.5202,  0.2171,  0.3931, -0.4436,  0.8207,
         1.3797,  0.2119, -0.2458,  2.3422, -0.3170,  0.6336,  0.3238, -0.4167,
         0.6993,  0.6079, -0.1936, -0.2361, -0.2147, -0.0736, -0.1225, -0.1050,
       

torch.Size([4, 1])
torch.Size([4, 1])
62 tensor(2.2482, grad_fn=<MseLossBackward0>) tensor([-2.4434e-01,  8.2765e-04,  7.1457e-01, -6.5248e-01,  5.8085e-01,
        -5.1822e-01,  5.8962e-01,  3.0237e-01, -3.3543e-03, -4.3323e-01,
        -9.7204e-03,  1.3016e+00, -1.1731e+00,  1.0407e+00, -9.4995e-01,
         1.0556e+00,  5.4864e-01,  4.7332e-03,  2.3187e+00, -3.1801e+00,
         2.1829e+00, -2.6598e+00,  2.4618e+00,  2.5243e+00,  2.2537e+00,
         2.1227e+00, -7.1264e+00, -2.2899e-01,  3.8916e-01, -4.2745e-01,
         4.7409e-01, -4.3404e-01, -1.9540e-01, -4.1055e-01, -3.2845e-01,
         8.7767e-01, -2.0286e-01,  2.7988e-01, -2.5150e+00,  3.4103e-02]) tensor([ 0.8613, -0.4320,  0.7838, -0.0717, -0.4238,  0.0524, -0.4400, -0.4809,
        -0.2114,  1.8209,  0.6762, -0.5190,  0.2295,  0.3808, -0.4563,  0.8208,
         1.3658,  0.2027, -0.2227,  2.3648, -0.2957,  0.6323,  0.3125, -0.4241,
         0.6917,  0.6077, -0.2077, -0.2374, -0.2119, -0.0790, -0.1158, -0.1049,
         0.

66 tensor(4.4674, grad_fn=<MseLossBackward0>) tensor([ 0.1212,  0.1605, -0.4401,  0.1830, -0.1135,  0.3715,  0.0727, -0.0647,
         0.3993,  0.1092, -0.0288, -0.5981,  1.1827, -1.4895, -0.4073, -1.3170,
         0.0779,  0.7049, -0.0543, -0.2704,  0.0065, -0.1035, -0.3324, -0.9075,
        -0.0436, -0.0183,  0.2932,  0.0452, -0.0818,  0.1631, -0.0919,  0.0850,
         0.1586,  0.0576,  0.0128, -0.0439, -0.2473, -0.1479, -0.1981,  0.0016]) tensor([ 0.8752, -0.4279,  0.8053, -0.1005, -0.4379,  0.0748, -0.4401, -0.4717,
        -0.2248,  1.8535,  0.7276, -0.5527,  0.2521,  0.3623, -0.4539,  0.8187,
         1.3405,  0.2001, -0.1835,  2.4003, -0.3039,  0.6328,  0.2914, -0.4265,
         0.6772,  0.5720, -0.1475, -0.2410, -0.2068, -0.0827, -0.1098, -0.1037,
         0.0385, -0.0058,  0.2260,  0.0681,  1.4119,  2.5508,  2.5218, -0.3136],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
66 tensor(5.2986, grad_fn=<MseLossBackward0>) tensor([ 0.2974, -0.2400, -0.3824

69 tensor(4.4674, grad_fn=<MseLossBackward0>) tensor([ 0.0992,  0.1632, -0.3868,  0.1972, -0.1256,  0.3944,  0.0613, -0.0701,
         0.3851,  0.1894, -0.0657, -0.6164,  1.2266, -1.5085, -0.4261, -1.3312,
         0.0632,  0.7141, -0.0447, -0.2683,  0.0538, -0.1098, -0.3122, -0.8958,
        -0.0239, -0.0295,  0.2768,  0.0265, -0.0687,  0.1620, -0.0641,  0.0576,
         0.1351,  0.0321,  0.0161, -0.0594, -0.2277, -0.0865, -0.2038,  0.0064]) tensor([ 0.8812, -0.4227,  0.8291, -0.1385, -0.4449,  0.0925, -0.4327, -0.4657,
        -0.2473,  1.8796,  0.7735, -0.5624,  0.2629,  0.3493, -0.4616,  0.8209,
         1.3210,  0.1993, -0.1520,  2.4279, -0.2938,  0.6214,  0.2835, -0.4199,
         0.6718,  0.5631, -0.1505, -0.2434, -0.2023, -0.0899, -0.1000, -0.1061,
         0.0389, -0.0081,  0.2345,  0.0672,  1.4329,  2.5744,  2.5286, -0.3170],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
69 tensor(5.2222, grad_fn=<MseLossBackward0>) tensor([ 0.4774, -0.2789, -0.6795

72 tensor(5.5279, grad_fn=<MseLossBackward0>) tensor([-0.0900, -0.0721,  0.1099, -0.0619,  0.2182,  0.4203,  0.2132, -0.4468,
        -0.0317, -0.5154,  0.0516,  0.0196, -0.7783,  1.0168,  1.2996,  1.0083,
        -1.2157, -0.8848, -0.1936,  0.0045, -1.1170, -0.4384,  0.7112,  0.3525,
         0.8346, -1.9910,  0.3899, -0.0312,  0.0506,  0.1459,  0.0775, -0.1377,
        -0.1912, -0.1558,  0.2038, -0.0066,  0.0891, -0.8712, -0.1718, -0.1071]) tensor([ 0.8720, -0.4067,  0.8680, -0.1838, -0.4521,  0.1124, -0.4234, -0.4584,
        -0.2822,  1.8913,  0.8411, -0.5749,  0.2522,  0.3570, -0.4571,  0.8468,
         1.2952,  0.1957, -0.1136,  2.4489, -0.2911,  0.5877,  0.2975, -0.3899,
         0.6848,  0.5553, -0.1940, -0.2455, -0.1979, -0.0959, -0.0898, -0.1092,
         0.0417, -0.0104,  0.2433,  0.0690,  1.4546,  2.5988,  2.5314, -0.3220],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
72 tensor(3.9555, grad_fn=<MseLossBackward0>) tensor([ 2.6711e-02,  1.3389e-01,

75 tensor(4.4344, grad_fn=<MseLossBackward0>) tensor([ 0.2181, -0.3603, -1.0190,  0.8701, -0.1188, -0.2655, -0.4029, -0.3333,
         0.1124, -1.0047,  0.7270, -0.6738,  0.2943, -0.1117, -0.1803, -0.1664,
         0.2516,  0.1658,  0.5697, -0.4518, -1.5175,  0.4275, -0.2155,  0.0197,
        -0.2124, -0.5674,  1.2039,  0.0200, -0.0923,  0.1378, -0.2578,  0.0814,
         0.1212,  0.1481, -0.0050, -0.0541, -0.3228, -0.1958, -0.5047, -0.0429]) tensor([ 0.8711, -0.3997,  0.8973, -0.2285, -0.4609,  0.1275, -0.4187, -0.4421,
        -0.3204,  1.9097,  0.8968, -0.5914,  0.2792,  0.3226, -0.4918,  0.8282,
         1.2826,  0.2080, -0.0809,  2.4725, -0.2718,  0.5832,  0.2784, -0.3997,
         0.6659,  0.5683, -0.2065, -0.2423, -0.2001, -0.1021, -0.0862, -0.1053,
         0.0519, -0.0061,  0.2514,  0.0623,  1.4807,  2.6255,  2.5395, -0.3254],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
75 tensor(2.2503, grad_fn=<MseLossBackward0>) tensor([-0.4916,  0.2932,  0.4624, -0

79 tensor(3.7595, grad_fn=<MseLossBackward0>) tensor([ 0.1159, -0.1823,  0.0924,  0.0088, -0.0020, -0.1131, -0.0556,  0.2272,
         0.0763,  0.4580, -0.9363,  1.0845,  0.6317, -1.4195, -0.8401, -1.4973,
         1.2031,  0.0068,  0.1397,  0.0638,  0.4738,  1.1711, -1.2059, -0.5381,
        -0.9813, -0.0798,  1.6667, -0.1150,  0.0582,  0.0230, -0.2085,  0.1796,
         0.0984,  0.1400, -0.0709, -0.1469,  0.0917, -0.0936, -0.1700,  0.0281]) tensor([ 0.8646, -0.3935,  0.9389, -0.2789, -0.4746,  0.1541, -0.4181, -0.4188,
        -0.3732,  1.9438,  0.9420, -0.5971,  0.2761,  0.3147, -0.4888,  0.8387,
         1.2527,  0.2073, -0.0449,  2.5016, -0.2682,  0.5636,  0.2684, -0.3862,
         0.6587,  0.5391, -0.1875, -0.2461, -0.1942, -0.1071, -0.0806, -0.1024,
         0.0565, -0.0035,  0.2630,  0.0591,  1.5202,  2.6523,  2.5382, -0.3315],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
79 tensor(5.4648, grad_fn=<MseLossBackward0>) tensor([ 0.2170, -0.2867, -0.2651

82 tensor(4.2389, grad_fn=<MseLossBackward0>) tensor([ 1.2944e-01, -3.0112e-01, -7.5686e-01,  8.1931e-01, -1.3691e-01,
        -1.9656e-01, -3.9833e-01, -3.3742e-01, -1.3014e-03, -1.0003e+00,
         7.8420e-01, -7.1426e-01,  3.3761e-01, -1.2356e-01, -1.9832e-01,
        -1.8074e-01,  2.4312e-01,  1.9832e-01,  5.5630e-01, -4.4202e-01,
        -1.3656e+00,  4.5434e-01, -2.7244e-01, -2.2119e-02, -2.7014e-01,
        -5.4049e-01,  1.2230e+00,  2.8054e-02, -1.2960e-01,  1.2270e-01,
        -2.5260e-01,  8.1614e-02,  1.1060e-01,  1.4042e-01, -2.3040e-02,
        -6.5028e-02, -2.8405e-01, -1.3975e-01, -5.2177e-01, -2.8922e-02]) tensor([ 8.4408e-01, -3.8417e-01,  9.7380e-01, -3.2074e-01, -4.9268e-01,
         1.6363e-01, -4.2564e-01, -3.8628e-01, -4.1372e-01,  1.9531e+00,
         1.0014e+00, -6.2079e-01,  2.8955e-01,  2.9851e-01, -5.0910e-01,
         8.3970e-01,  1.2249e+00,  2.1757e-01, -1.4206e-02,  2.5234e+00,
        -2.5530e-01,  5.4466e-01,  2.5949e-01, -3.8916e-01,  6.4632e-01,
    

85 tensor(4.2012, grad_fn=<MseLossBackward0>) tensor([ 0.1342, -0.3165, -0.6876,  0.8222, -0.1561, -0.2044, -0.4145, -0.3273,
        -0.0290, -0.9658,  0.7802, -0.7453,  0.3746, -0.1447, -0.2228, -0.2020,
         0.2296,  0.2336,  0.5395, -0.4239, -1.3367,  0.4701, -0.2987, -0.0388,
        -0.2939, -0.5405,  1.2229, -0.0174, -0.0498,  0.0338, -0.1503,  0.0989,
         0.0897,  0.0967, -0.0114, -0.0588, -0.2411, -0.1075, -0.5078, -0.0238]) tensor([ 8.3376e-01, -3.8534e-01,  9.8853e-01, -3.5280e-01, -5.1250e-01,
         1.7269e-01, -4.3685e-01, -3.6376e-01, -4.4169e-01,  1.9709e+00,
         1.0385e+00, -6.2427e-01,  2.9171e-01,  2.8947e-01, -5.1376e-01,
         8.4451e-01,  1.2016e+00,  2.2032e-01,  1.1769e-02,  2.5441e+00,
        -2.5506e-01,  5.2835e-01,  2.5185e-01, -3.8495e-01,  6.3732e-01,
         5.5155e-01, -2.1829e-01, -2.5069e-01, -1.8041e-01, -1.2157e-01,
        -6.7145e-02, -1.0251e-01,  6.5730e-02, -1.1179e-03,  2.7892e-01,
         6.2305e-02,  1.5690e+00,  2.6928e

88 tensor(3.7350, grad_fn=<MseLossBackward0>) tensor([ 0.3008, -0.0601,  0.0756,  0.0088,  0.1366,  0.2008,  0.0846,  0.2969,
         0.3856,  1.0192, -0.6763,  0.0128, -0.7648,  1.0444,  1.1730,  0.9446,
         0.2085,  0.5044, -0.1083, -0.1026, -0.0912, -0.6707,  0.5879,  1.0810,
         0.5213, -0.0575,  0.3458,  0.0012, -0.0070, -0.0078,  0.0857, -0.0736,
        -0.1338, -0.0807, -0.0794, -0.0168, -0.0135,  0.7288, -0.2391,  0.0809]) tensor([ 8.1120e-01, -3.8011e-01,  1.0004e+00, -3.8316e-01, -5.3113e-01,
         1.9097e-01, -4.4962e-01, -3.4188e-01, -4.8444e-01,  2.0018e+00,
         1.0605e+00, -6.2206e-01,  2.8430e-01,  2.9058e-01, -5.0620e-01,
         8.5811e-01,  1.1824e+00,  2.2767e-01,  3.5392e-02,  2.5625e+00,
        -2.5919e-01,  5.0535e-01,  2.5121e-01, -3.7023e-01,  6.3394e-01,
         5.4725e-01, -2.1558e-01, -2.5322e-01, -1.7293e-01, -1.2886e-01,
        -5.9765e-02, -1.0594e-01,  6.8317e-02, -1.9905e-03,  2.8497e-01,
         6.3210e-02,  1.5891e+00,  2.7159e

torch.Size([100, 1])
torch.Size([100, 1])
91 tensor(4.1234, grad_fn=<MseLossBackward0>) tensor([ 0.0822, -0.2604, -0.5568,  0.7426, -0.1075, -0.1445, -0.3700, -0.2461,
        -0.0646, -0.9220,  0.7850, -0.7763,  0.3569, -0.1256, -0.1920, -0.1790,
         0.2273,  0.2376,  0.4784, -0.3632, -1.3040,  0.4531, -0.3184, -0.0064,
        -0.3006, -0.5362,  1.1840, -0.0116, -0.0594,  0.0267, -0.1469,  0.0976,
         0.0830,  0.0935, -0.0179, -0.0573, -0.2443, -0.0653, -0.4510, -0.0170]) tensor([ 7.8727e-01, -3.7695e-01,  1.0052e+00, -4.1712e-01, -5.4932e-01,
         2.0966e-01, -4.6018e-01, -3.2593e-01, -5.2874e-01,  2.0097e+00,
         1.0939e+00, -6.1884e-01,  2.9294e-01,  2.7136e-01, -5.2543e-01,
         8.5209e-01,  1.1602e+00,  2.2544e-01,  5.9531e-02,  2.5819e+00,
        -2.6382e-01,  4.9567e-01,  2.4108e-01, -3.7818e-01,  6.2126e-01,
         5.4499e-01, -2.1960e-01, -2.5163e-01, -1.7035e-01, -1.3216e-01,
        -5.8335e-02, -1.0432e-01,  7.6582e-02,  2.1441e-03,  2.9608e-01,


94 tensor(4.2584, grad_fn=<MseLossBackward0>) tensor([-0.3347,  0.6991,  0.3749,  0.3362, -0.1074,  0.2875,  0.1004, -0.0983,
        -0.2127,  0.3565, -0.4471, -0.0877,  0.6870, -1.0423,  0.2006, -0.8888,
         0.3493,  0.4434, -0.0849, -0.1207,  0.2721, -0.0785, -0.2775, -0.9786,
        -0.0063, -0.0294,  0.2323,  0.0208, -0.0659,  0.0744, -0.0371,  0.0397,
         0.1361,  0.0116, -0.0166, -0.0136, -0.0472, -0.0525, -0.0692,  0.0036]) tensor([ 0.7839, -0.3850,  0.9988, -0.4401, -0.5630,  0.2302, -0.4686, -0.3214,
        -0.5510,  2.0382,  1.0990, -0.6119,  0.2974,  0.2628, -0.5159,  0.8507,
         1.1362,  0.2260,  0.0691,  2.6068, -0.2761,  0.4923,  0.2288, -0.3879,
         0.6095,  0.5209, -0.1731, -0.2520, -0.1661, -0.1357, -0.0550, -0.1053,
         0.0807,  0.0031,  0.3037,  0.0535,  1.6274,  2.7304,  2.5653, -0.3592],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
94 tensor(4.4008, grad_fn=<MseLossBackward0>) tensor([ 0.4974, -0.3422, -0.5101

torch.Size([100, 1])
torch.Size([100, 1])
96 tensor(3.6779, grad_fn=<MseLossBackward0>) tensor([ 0.3392, -0.1942, -0.0269,  0.0458,  0.0528,  0.0990,  0.0104,  0.2958,
         0.2656,  1.0825, -0.7002, -0.0303, -0.7720,  1.0657,  1.1854,  0.9623,
         0.1519,  0.5457, -0.0325, -0.1608, -0.1485, -0.6697,  0.5777,  1.0833,
         0.5086, -0.1076,  0.3442, -0.0267,  0.0095,  0.0136,  0.0821, -0.0714,
        -0.1296, -0.0781, -0.0713, -0.0500,  0.0179,  0.7339, -0.3004,  0.0807]) tensor([ 0.7630, -0.3859,  0.9995, -0.4676, -0.5759,  0.2385, -0.4780, -0.3066,
        -0.5784,  2.0469,  1.1291, -0.6103,  0.2874,  0.2708, -0.5278,  0.8682,
         1.1315,  0.2361,  0.0941,  2.6085, -0.2736,  0.4641,  0.2422, -0.3654,
         0.6137,  0.5403, -0.2177, -0.2513, -0.1626, -0.1401, -0.0510, -0.1063,
         0.0847,  0.0047,  0.3093,  0.0512,  1.6402,  2.7534,  2.5623, -0.3630],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
96 tensor(4.0826, grad_fn=<MseLossBac

torch.Size([100, 1])
torch.Size([100, 1])
99 tensor(4.3083, grad_fn=<MseLossBackward0>) tensor([ 0.4469, -0.3238, -0.5470,  0.4420,  0.0901, -0.2234, -0.1898, -0.4004,
         0.2991, -0.0471, -0.2381, -0.7810,  0.4543,  0.4887,  0.2214,  0.1532,
        -0.9744, -0.1588, -0.9756,  0.8913,  0.0153,  0.8952, -0.5228, -0.8637,
        -0.6120,  0.2115,  1.8331,  0.1881, -0.1934, -0.0707, -0.0701,  0.0646,
        -0.0110,  0.0642, -0.0168, -0.1469, -0.2185, -0.2663,  0.8880,  0.0631]) tensor([ 0.7608, -0.4005,  0.9888, -0.4916, -0.5911,  0.2566, -0.4911, -0.3009,
        -0.5912,  2.0581,  1.1468, -0.6014,  0.2956,  0.2636, -0.5323,  0.8657,
         1.1030,  0.2272,  0.1039,  2.6343, -0.2865,  0.4686,  0.2295, -0.3768,
         0.5986,  0.5178, -0.1779, -0.2512, -0.1582, -0.1432, -0.0485, -0.1073,
         0.0884,  0.0058,  0.3183,  0.0498,  1.6579,  2.7533,  2.5722, -0.3697],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
99 tensor(3.5338, grad_fn=<MseLossBac

In [13]:
parameters_to_vector(model.parameters())

tensor([ 0.7507, -0.3908,  0.9968, -0.4981, -0.5987,  0.2649, -0.4940, -0.2958,
        -0.5991,  2.0674,  1.1485, -0.6043,  0.3032,  0.2484, -0.5323,  0.8558,
         1.1065,  0.2295,  0.0843,  2.6632, -0.2868,  0.4802,  0.2135, -0.4030,
         0.5820,  0.5223, -0.1598, -0.2478, -0.1611, -0.1418, -0.0495, -0.1052,
         0.0934,  0.0084,  0.3210,  0.0434,  1.6643,  2.7589,  2.5993, -0.3700],
       grad_fn=<CatBackward0>)

In [14]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [15]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [16]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = d0, parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0 + 1.0 * float(np.random.randn(1)))
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[i].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[i][j], parameters_to_vector(models[i].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [17]:
print(projection_list[0])

[0, 0, tensor([[1.5772, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 1.5772, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.5772, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0

In [18]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(X_test, y_test), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [19]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [20]:
torch.norm(datapoints[model.user_id]['exact_weights'])

tensor(5.0990, dtype=torch.float64)

In [21]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([ 2.5791e-01, -1.1576e-01, -1.8839e-01, -3.0653e-01,  3.2938e-02,
         1.2947e-01, -2.4702e-01, -2.6247e-01,  1.0280e-01, -3.4274e-02,
        -2.9882e-01,  1.1148e-01, -7.8249e-04,  3.1218e-01, -1.3248e-01,
        -1.0801e-01,  7.4604e-03,  2.9790e-01, -8.8858e-02, -1.7427e-01,
        -9.1826e-02, -6.6660e-02,  2.4504e-01, -2.0514e-01,  3.7661e-04,
         2.3968e-01,  5.0011e-02,  1.2925e-01,  6.7731e-03, -2.1153e-01,
         2.5250e-01, -2.7403e-01, -1.0404e-01, -1.5580e-01,  7.3808e-02,
         1.3833e-01, -6.9302e-03, -3.3627e-01,  1.4531e-01, -4.7420e-01])


tensor([ 5.1583e-01, -2.3153e-01, -3.7678e-01, -6.1306e-01,  6.5875e-02,
         2.5895e-01, -4.9404e-01, -5.2493e-01,  2.0559e-01, -6.8548e-02,
        -5.9763e-01,  2.2296e-01, -1.5650e-03,  6.2436e-01, -2.6496e-01,
        -2.1603e-01,  1.4921e-02,  5.9580e-01, -1.7772e-01, -3.4855e-01,
        -1.8365e-01, -1.3332e-01,  4.9007e-01, -4.1028e-01,  7.5322e-04,
         4.7937e-01,  1.0002e-01,  2.5850e-01,  1.3546e-02, -4.2307e-01,
         5.0500e-01, -5.4807e-01, -2.0809e-01, -3.1160e-01,  1.4762e-01,
         2.7666e-01, -1.3860e-02, -6.7254e-01,  2.9063e-01, -9.4841e-01],
       grad_fn=<CatBackward0>)

In [22]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
    
    if curr_round == 60:
        alpha *= 0.8
    if curr_round == 200:
        alpha *= 0.8
        
    
        
    
    
    # Update prjection matrix
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            mat_vec_sum = torch.zeros(d0)
            for k in G.neighbors(i):
                mat_vec_sum = torch.add(mat_vec_sum, projected_weights[i][k] - projected_weights[k][i])
            temp_mat = torch.outer(mat_vec_sum, weights).clone()


            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * temp_mat)
                                         
    projected_weights = []                                          
    update_ProjWeight(projection_list, projected_weights, first_run=False)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        #user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #total_rel_error.append(user_rel_error / no_users)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/2000 [00:01<52:02,  1.56s/it]

Training_loss 48.21755


  0%|          | 2/2000 [00:02<43:37,  1.31s/it]

Training_loss 47.92515


  0%|          | 3/2000 [00:03<42:02,  1.26s/it]

Training_loss 47.57179


  0%|          | 4/2000 [00:05<44:05,  1.33s/it]

Training_loss 47.27007


  0%|          | 5/2000 [00:06<44:15,  1.33s/it]

Training_loss 46.92846


  0%|          | 6/2000 [00:07<41:23,  1.25s/it]

Training_loss 46.47045


  0%|          | 7/2000 [00:08<39:41,  1.19s/it]

Training_loss 46.01447


  0%|          | 8/2000 [00:10<39:22,  1.19s/it]

Training_loss 45.72222


  0%|          | 9/2000 [00:11<38:52,  1.17s/it]

Training_loss 45.25854


  0%|          | 10/2000 [00:12<38:17,  1.15s/it]

Training_loss 44.75205


  1%|          | 11/2000 [00:13<38:53,  1.17s/it]

Training_loss 44.02657


  1%|          | 12/2000 [00:14<40:34,  1.22s/it]

Training_loss 43.38439


  1%|          | 13/2000 [00:16<46:40,  1.41s/it]

Training_loss 42.91027


  1%|          | 14/2000 [00:18<51:48,  1.57s/it]

Training_loss 42.43865


  1%|          | 15/2000 [00:20<52:12,  1.58s/it]

Training_loss 41.75613


  1%|          | 16/2000 [00:22<1:01:59,  1.87s/it]

Training_loss 41.16360


  1%|          | 17/2000 [00:24<57:12,  1.73s/it]  

Training_loss 40.33659


  1%|          | 18/2000 [00:25<49:51,  1.51s/it]

Training_loss 39.30180


  1%|          | 19/2000 [00:26<44:35,  1.35s/it]

Training_loss 38.44120


  1%|          | 20/2000 [00:27<41:34,  1.26s/it]

Training_loss 37.43665


  1%|          | 21/2000 [00:28<40:14,  1.22s/it]

Training_loss 36.56973


  1%|          | 22/2000 [00:29<39:39,  1.20s/it]

Training_loss 36.01286


  1%|          | 23/2000 [00:30<39:33,  1.20s/it]

Training_loss 35.30096


  1%|          | 24/2000 [00:31<38:13,  1.16s/it]

Training_loss 34.57916


  1%|▏         | 25/2000 [00:32<36:46,  1.12s/it]

Training_loss 33.94390


  1%|▏         | 26/2000 [00:33<36:07,  1.10s/it]

Training_loss 33.10670


  1%|▏         | 27/2000 [00:34<35:18,  1.07s/it]

Training_loss 32.57769


  1%|▏         | 28/2000 [00:35<35:53,  1.09s/it]

Training_loss 32.08562


  1%|▏         | 29/2000 [00:37<36:16,  1.10s/it]

Training_loss 31.49369


  2%|▏         | 30/2000 [00:38<38:26,  1.17s/it]

Training_loss 30.96830


  2%|▏         | 31/2000 [00:39<36:45,  1.12s/it]

Training_loss 30.60369


  2%|▏         | 32/2000 [00:40<35:19,  1.08s/it]

Training_loss 30.20894


  2%|▏         | 33/2000 [00:41<34:13,  1.04s/it]

Training_loss 29.92614


  2%|▏         | 34/2000 [00:42<34:18,  1.05s/it]

Training_loss 29.77050


  2%|▏         | 35/2000 [00:43<36:57,  1.13s/it]

Training_loss 29.42813


  2%|▏         | 36/2000 [00:44<36:17,  1.11s/it]

Training_loss 29.02199


  2%|▏         | 37/2000 [00:45<34:18,  1.05s/it]

Training_loss 28.15070


  2%|▏         | 38/2000 [00:46<33:39,  1.03s/it]

Training_loss 27.81230


  2%|▏         | 39/2000 [00:47<33:34,  1.03s/it]

Training_loss 27.39240


  2%|▏         | 40/2000 [00:48<32:55,  1.01s/it]

Training_loss 26.97870


  2%|▏         | 41/2000 [00:49<31:38,  1.03it/s]

Training_loss 26.70310


  2%|▏         | 42/2000 [00:50<32:15,  1.01it/s]

Training_loss 26.19393


  2%|▏         | 43/2000 [00:51<31:50,  1.02it/s]

Training_loss 25.49176


  2%|▏         | 44/2000 [00:52<32:10,  1.01it/s]

Training_loss 25.17268


  2%|▏         | 45/2000 [00:53<35:17,  1.08s/it]

Training_loss 24.88421


  2%|▏         | 46/2000 [00:54<34:28,  1.06s/it]

Training_loss 24.40024


  2%|▏         | 47/2000 [00:55<34:27,  1.06s/it]

Training_loss 23.79118


  2%|▏         | 48/2000 [00:56<33:21,  1.03s/it]

Training_loss 23.23015


  2%|▏         | 49/2000 [00:57<33:47,  1.04s/it]

Training_loss 22.99021


  2%|▎         | 50/2000 [00:58<33:02,  1.02s/it]

Training_loss 22.96631


  3%|▎         | 51/2000 [00:59<33:30,  1.03s/it]

Training_loss 22.74363


  3%|▎         | 52/2000 [01:00<33:39,  1.04s/it]

Training_loss 22.27370


  3%|▎         | 53/2000 [01:02<34:25,  1.06s/it]

Training_loss 22.13000


  3%|▎         | 54/2000 [01:03<33:29,  1.03s/it]

Training_loss 22.05089


  3%|▎         | 55/2000 [01:03<32:10,  1.01it/s]

Training_loss 21.94996


  3%|▎         | 56/2000 [01:04<32:00,  1.01it/s]

Training_loss 21.79865


  3%|▎         | 57/2000 [01:05<31:56,  1.01it/s]

Training_loss 21.42966


  3%|▎         | 58/2000 [01:06<31:28,  1.03it/s]

Training_loss 21.24175


  3%|▎         | 59/2000 [01:07<31:21,  1.03it/s]

Training_loss 21.01788


  3%|▎         | 60/2000 [01:08<31:41,  1.02it/s]

Training_loss 20.89865


  3%|▎         | 61/2000 [01:10<34:34,  1.07s/it]

Training_loss 20.84496


  3%|▎         | 62/2000 [01:11<34:21,  1.06s/it]

Training_loss 20.71995


  3%|▎         | 63/2000 [01:12<33:13,  1.03s/it]

Training_loss 20.70376


  3%|▎         | 64/2000 [01:13<32:54,  1.02s/it]

Training_loss 20.73514


  3%|▎         | 65/2000 [01:14<31:35,  1.02it/s]

Training_loss 20.64732


  3%|▎         | 66/2000 [01:15<32:24,  1.01s/it]

Training_loss 20.57596


  3%|▎         | 67/2000 [01:15<31:32,  1.02it/s]

Training_loss 20.61067


  3%|▎         | 68/2000 [01:17<31:58,  1.01it/s]

Training_loss 20.40466


  3%|▎         | 69/2000 [01:18<31:56,  1.01it/s]

Training_loss 20.07716


  4%|▎         | 70/2000 [01:18<30:58,  1.04it/s]

Training_loss 20.02940


  4%|▎         | 71/2000 [01:19<31:47,  1.01it/s]

Training_loss 20.15565


  4%|▎         | 72/2000 [01:20<31:04,  1.03it/s]

Training_loss 20.06128


  4%|▎         | 73/2000 [01:21<31:18,  1.03it/s]

Training_loss 19.86194


  4%|▎         | 74/2000 [01:22<30:46,  1.04it/s]

Training_loss 19.91672


  4%|▍         | 75/2000 [01:23<30:13,  1.06it/s]

Training_loss 19.89018


  4%|▍         | 76/2000 [01:24<31:04,  1.03it/s]

Training_loss 19.70787


  4%|▍         | 77/2000 [01:25<34:01,  1.06s/it]

Training_loss 19.66522


  4%|▍         | 78/2000 [01:27<33:30,  1.05s/it]

Training_loss 19.84889


  4%|▍         | 79/2000 [01:27<32:34,  1.02s/it]

Training_loss 19.75997


  4%|▍         | 80/2000 [01:29<32:50,  1.03s/it]

Training_loss 19.70188


  4%|▍         | 81/2000 [01:30<32:42,  1.02s/it]

Training_loss 19.71702


  4%|▍         | 82/2000 [01:31<32:45,  1.02s/it]

Training_loss 19.68969


  4%|▍         | 83/2000 [01:31<31:13,  1.02it/s]

Training_loss 19.71519


  4%|▍         | 84/2000 [01:32<31:15,  1.02it/s]

Training_loss 19.69145


  4%|▍         | 85/2000 [01:33<30:15,  1.05it/s]

Training_loss 19.66417


  4%|▍         | 86/2000 [01:34<30:55,  1.03it/s]

Training_loss 19.63202


  4%|▍         | 87/2000 [01:35<30:27,  1.05it/s]

Training_loss 19.61932


  4%|▍         | 88/2000 [01:36<30:32,  1.04it/s]

Training_loss 19.51017


  4%|▍         | 89/2000 [01:37<30:46,  1.03it/s]

Training_loss 19.53803


  4%|▍         | 90/2000 [01:38<31:03,  1.02it/s]

Training_loss 19.51461


  5%|▍         | 91/2000 [01:39<30:58,  1.03it/s]

Training_loss 19.31933


  5%|▍         | 92/2000 [01:40<31:38,  1.01it/s]

Training_loss 19.21011


  5%|▍         | 93/2000 [01:41<33:55,  1.07s/it]

Training_loss 19.25336


  5%|▍         | 94/2000 [01:43<35:36,  1.12s/it]

Training_loss 19.35746


  5%|▍         | 95/2000 [01:44<36:25,  1.15s/it]

Training_loss 19.31646


  5%|▍         | 96/2000 [01:45<36:47,  1.16s/it]

Training_loss 19.24786


  5%|▍         | 97/2000 [01:46<35:37,  1.12s/it]

Training_loss 19.21149


  5%|▍         | 98/2000 [01:47<33:24,  1.05s/it]

Training_loss 19.09000


  5%|▍         | 99/2000 [01:48<32:22,  1.02s/it]

Training_loss 19.07940


  5%|▌         | 100/2000 [01:49<31:46,  1.00s/it]

Training_loss 19.20902


  5%|▌         | 101/2000 [01:50<31:04,  1.02it/s]

Training_loss 19.42590


  5%|▌         | 102/2000 [01:51<30:27,  1.04it/s]

Training_loss 19.35591


  5%|▌         | 103/2000 [01:52<30:03,  1.05it/s]

Training_loss 19.29451


  5%|▌         | 104/2000 [01:53<30:30,  1.04it/s]

Training_loss 19.21525


  5%|▌         | 105/2000 [01:54<32:33,  1.03s/it]

Training_loss 18.98871


  5%|▌         | 106/2000 [01:55<33:08,  1.05s/it]

Training_loss 19.15094


  5%|▌         | 107/2000 [01:56<32:09,  1.02s/it]

Training_loss 19.14891


  5%|▌         | 108/2000 [01:57<35:27,  1.12s/it]

Training_loss 19.17562


  5%|▌         | 109/2000 [01:59<37:01,  1.17s/it]

Training_loss 19.28702


  6%|▌         | 110/2000 [02:00<37:33,  1.19s/it]

Training_loss 19.40129


  6%|▌         | 111/2000 [02:01<36:57,  1.17s/it]

Training_loss 19.44909


  6%|▌         | 112/2000 [02:02<36:48,  1.17s/it]

Training_loss 19.39413


  6%|▌         | 113/2000 [02:03<35:00,  1.11s/it]

Training_loss 19.38458


  6%|▌         | 114/2000 [02:04<35:09,  1.12s/it]

Training_loss 19.19467


  6%|▌         | 115/2000 [02:05<35:18,  1.12s/it]

Training_loss 19.21490


  6%|▌         | 116/2000 [02:07<36:26,  1.16s/it]

Training_loss 19.19475


  6%|▌         | 117/2000 [02:08<37:22,  1.19s/it]

Training_loss 19.07320


  6%|▌         | 118/2000 [02:09<36:06,  1.15s/it]

Training_loss 19.03904


  6%|▌         | 119/2000 [02:10<34:01,  1.09s/it]

Training_loss 19.07737


  6%|▌         | 120/2000 [02:11<33:02,  1.05s/it]

Training_loss 19.01376


  6%|▌         | 121/2000 [02:12<31:50,  1.02s/it]

Training_loss 18.90911


  6%|▌         | 122/2000 [02:13<34:08,  1.09s/it]

Training_loss 18.81926


  6%|▌         | 123/2000 [02:14<33:42,  1.08s/it]

Training_loss 18.79671


  6%|▌         | 124/2000 [02:15<32:38,  1.04s/it]

Training_loss 18.84997


  6%|▋         | 125/2000 [02:16<33:50,  1.08s/it]

Training_loss 18.94784


  6%|▋         | 126/2000 [02:17<33:21,  1.07s/it]

Training_loss 18.77489


  6%|▋         | 127/2000 [02:18<34:10,  1.09s/it]

Training_loss 18.57725


  6%|▋         | 128/2000 [02:19<33:52,  1.09s/it]

Training_loss 18.46681


  6%|▋         | 129/2000 [02:20<32:17,  1.04s/it]

Training_loss 18.43700


  6%|▋         | 130/2000 [02:21<32:10,  1.03s/it]

Training_loss 18.45081


  7%|▋         | 131/2000 [02:22<31:29,  1.01s/it]

Training_loss 18.40970


  7%|▋         | 132/2000 [02:23<30:36,  1.02it/s]

Training_loss 18.34780


  7%|▋         | 133/2000 [02:24<30:21,  1.03it/s]

Training_loss 18.38983


  7%|▋         | 134/2000 [02:25<29:49,  1.04it/s]

Training_loss 18.28335


  7%|▋         | 135/2000 [02:26<29:26,  1.06it/s]

Training_loss 18.14175


  7%|▋         | 136/2000 [02:27<29:45,  1.04it/s]

Training_loss 18.16319


  7%|▋         | 137/2000 [02:28<30:57,  1.00it/s]

Training_loss 18.15905


  7%|▋         | 138/2000 [02:29<33:30,  1.08s/it]

Training_loss 18.04301


  7%|▋         | 139/2000 [02:30<33:08,  1.07s/it]

Training_loss 18.08220


  7%|▋         | 140/2000 [02:31<31:48,  1.03s/it]

Training_loss 18.03196


  7%|▋         | 141/2000 [02:32<31:30,  1.02s/it]

Training_loss 18.21804


  7%|▋         | 142/2000 [02:33<31:31,  1.02s/it]

Training_loss 18.23882


  7%|▋         | 143/2000 [02:34<31:35,  1.02s/it]

Training_loss 18.31040


  7%|▋         | 144/2000 [02:35<31:01,  1.00s/it]

Training_loss 18.35750


  7%|▋         | 145/2000 [02:36<29:49,  1.04it/s]

Training_loss 18.33695


  7%|▋         | 146/2000 [02:37<30:24,  1.02it/s]

Training_loss 18.24354


  7%|▋         | 147/2000 [02:38<29:45,  1.04it/s]

Training_loss 18.37957


  7%|▋         | 148/2000 [02:39<30:23,  1.02it/s]

Training_loss 18.16536


  7%|▋         | 149/2000 [02:40<30:04,  1.03it/s]

Training_loss 18.13292


  8%|▊         | 150/2000 [02:41<30:09,  1.02it/s]

Training_loss 17.93207


  8%|▊         | 151/2000 [02:42<30:21,  1.02it/s]

Training_loss 17.92306


  8%|▊         | 152/2000 [02:43<29:47,  1.03it/s]

Training_loss 17.58497


  8%|▊         | 153/2000 [02:44<29:41,  1.04it/s]

Training_loss 17.67013


  8%|▊         | 154/2000 [02:45<32:52,  1.07s/it]

Training_loss 17.79133


  8%|▊         | 155/2000 [02:46<31:52,  1.04s/it]

Training_loss 17.61625


  8%|▊         | 156/2000 [02:47<31:49,  1.04s/it]

Training_loss 17.52185


  8%|▊         | 157/2000 [02:48<30:09,  1.02it/s]

Training_loss 17.45868


  8%|▊         | 158/2000 [02:49<30:05,  1.02it/s]

Training_loss 17.40607


  8%|▊         | 159/2000 [02:50<29:17,  1.05it/s]

Training_loss 17.24897


  8%|▊         | 160/2000 [02:51<29:13,  1.05it/s]

Training_loss 17.11626


  8%|▊         | 161/2000 [02:52<31:19,  1.02s/it]

Training_loss 17.00013


  8%|▊         | 162/2000 [02:53<31:23,  1.02s/it]

Training_loss 17.05456


  8%|▊         | 163/2000 [02:54<31:37,  1.03s/it]

Training_loss 16.77790


  8%|▊         | 164/2000 [02:55<30:29,  1.00it/s]

Training_loss 16.90599


  8%|▊         | 165/2000 [02:56<30:17,  1.01it/s]

Training_loss 16.94506


  8%|▊         | 166/2000 [02:57<30:03,  1.02it/s]

Training_loss 17.02931


  8%|▊         | 167/2000 [02:58<29:37,  1.03it/s]

Training_loss 17.09761


  8%|▊         | 168/2000 [02:59<30:20,  1.01it/s]

Training_loss 17.29779


  8%|▊         | 169/2000 [03:00<31:22,  1.03s/it]

Training_loss 17.18977


  8%|▊         | 170/2000 [03:02<33:44,  1.11s/it]

Training_loss 16.96719


  9%|▊         | 171/2000 [03:03<32:25,  1.06s/it]

Training_loss 16.95995


  9%|▊         | 172/2000 [03:04<32:13,  1.06s/it]

Training_loss 16.90629


  9%|▊         | 173/2000 [03:04<30:51,  1.01s/it]

Training_loss 17.04056


  9%|▊         | 174/2000 [03:05<29:49,  1.02it/s]

Training_loss 17.18495


  9%|▉         | 175/2000 [03:06<30:11,  1.01it/s]

Training_loss 17.29331


  9%|▉         | 176/2000 [03:07<29:28,  1.03it/s]

Training_loss 17.39470


  9%|▉         | 177/2000 [03:08<28:58,  1.05it/s]

Training_loss 17.40963


  9%|▉         | 178/2000 [03:09<28:45,  1.06it/s]

Training_loss 17.34661


  9%|▉         | 179/2000 [03:10<28:29,  1.07it/s]

Training_loss 17.38626


  9%|▉         | 180/2000 [03:11<28:22,  1.07it/s]

Training_loss 17.46909


  9%|▉         | 181/2000 [03:12<29:56,  1.01it/s]

Training_loss 17.21755


  9%|▉         | 182/2000 [03:14<34:27,  1.14s/it]

Training_loss 17.00419


  9%|▉         | 183/2000 [03:15<35:10,  1.16s/it]

Training_loss 17.10523


  9%|▉         | 184/2000 [03:16<33:40,  1.11s/it]

Training_loss 17.13617


  9%|▉         | 185/2000 [03:17<34:37,  1.14s/it]

Training_loss 17.33364


  9%|▉         | 186/2000 [03:18<34:10,  1.13s/it]

Training_loss 17.39118


  9%|▉         | 187/2000 [03:19<33:03,  1.09s/it]

Training_loss 17.48127


  9%|▉         | 188/2000 [03:20<32:21,  1.07s/it]

Training_loss 17.36697


  9%|▉         | 189/2000 [03:21<32:14,  1.07s/it]

Training_loss 17.35546


 10%|▉         | 190/2000 [03:22<31:07,  1.03s/it]

Training_loss 17.44789


 10%|▉         | 191/2000 [03:23<30:22,  1.01s/it]

Training_loss 17.31610


 10%|▉         | 192/2000 [03:24<30:08,  1.00s/it]

Training_loss 17.29792


 10%|▉         | 193/2000 [03:25<29:52,  1.01it/s]

Training_loss 17.66150


 10%|▉         | 194/2000 [03:26<29:29,  1.02it/s]

Training_loss 17.56633


 10%|▉         | 195/2000 [03:27<29:21,  1.02it/s]

Training_loss 17.32620


 10%|▉         | 196/2000 [03:28<29:19,  1.03it/s]

Training_loss 17.29655


 10%|▉         | 197/2000 [03:29<29:05,  1.03it/s]

Training_loss 17.26668


 10%|▉         | 198/2000 [03:30<28:54,  1.04it/s]

Training_loss 17.41804


 10%|▉         | 199/2000 [03:31<28:27,  1.05it/s]

Training_loss 17.67491


 10%|█         | 200/2000 [03:32<28:01,  1.07it/s]

Training_loss 17.54041


 10%|█         | 201/2000 [03:33<30:01,  1.00s/it]

Training_loss 17.42762


 10%|█         | 202/2000 [03:34<32:47,  1.09s/it]

Training_loss 17.27577


 10%|█         | 203/2000 [03:35<32:11,  1.07s/it]

Training_loss 17.28731


 10%|█         | 204/2000 [03:36<31:43,  1.06s/it]

Training_loss 17.37576


 10%|█         | 205/2000 [03:37<30:31,  1.02s/it]

Training_loss 17.33716


 10%|█         | 206/2000 [03:38<29:47,  1.00it/s]

Training_loss 17.32183


 10%|█         | 207/2000 [03:39<28:57,  1.03it/s]

Training_loss 17.28497


 10%|█         | 208/2000 [03:40<28:41,  1.04it/s]

Training_loss 17.43576


 10%|█         | 209/2000 [03:41<27:54,  1.07it/s]

Training_loss 17.41816


 10%|█         | 210/2000 [03:42<27:45,  1.07it/s]

Training_loss 17.56159


 11%|█         | 211/2000 [03:43<29:15,  1.02it/s]

Training_loss 17.62010


 11%|█         | 212/2000 [03:44<31:46,  1.07s/it]

Training_loss 17.76278


 11%|█         | 213/2000 [03:45<32:08,  1.08s/it]

Training_loss 17.90304


 11%|█         | 214/2000 [03:46<31:02,  1.04s/it]

Training_loss 18.08973


 11%|█         | 215/2000 [03:47<30:20,  1.02s/it]

Training_loss 18.00482


 11%|█         | 216/2000 [03:48<32:44,  1.10s/it]

Training_loss 18.02847


 11%|█         | 217/2000 [03:50<34:55,  1.18s/it]

Training_loss 17.99276


 11%|█         | 218/2000 [03:51<32:38,  1.10s/it]

Training_loss 17.89582


 11%|█         | 219/2000 [03:52<31:35,  1.06s/it]

Training_loss 17.88360


 11%|█         | 220/2000 [03:53<29:57,  1.01s/it]

Training_loss 17.79817


 11%|█         | 221/2000 [03:54<29:39,  1.00s/it]

Training_loss 17.86134


 11%|█         | 222/2000 [03:54<29:18,  1.01it/s]

Training_loss 17.80797


 11%|█         | 223/2000 [03:55<28:16,  1.05it/s]

Training_loss 17.72272


 11%|█         | 224/2000 [03:56<29:00,  1.02it/s]

Training_loss 17.77449


 11%|█▏        | 225/2000 [03:57<29:14,  1.01it/s]

Training_loss 17.80983


 11%|█▏        | 226/2000 [03:58<29:03,  1.02it/s]

Training_loss 17.87791


 11%|█▏        | 227/2000 [03:59<29:20,  1.01it/s]

Training_loss 17.68858


 11%|█▏        | 228/2000 [04:00<30:16,  1.03s/it]

Training_loss 17.81606


 11%|█▏        | 229/2000 [04:01<30:10,  1.02s/it]

Training_loss 17.73345


 12%|█▏        | 230/2000 [04:02<29:54,  1.01s/it]

Training_loss 17.71804


 12%|█▏        | 231/2000 [04:03<29:40,  1.01s/it]

Training_loss 17.95878


 12%|█▏        | 232/2000 [04:05<30:10,  1.02s/it]

Training_loss 18.02689


 12%|█▏        | 233/2000 [04:06<31:55,  1.08s/it]

Training_loss 18.00636


 12%|█▏        | 234/2000 [04:07<31:37,  1.07s/it]

Training_loss 17.84257


 12%|█▏        | 235/2000 [04:08<30:45,  1.05s/it]

Training_loss 17.93750


 12%|█▏        | 236/2000 [04:09<30:14,  1.03s/it]

Training_loss 17.88598


 12%|█▏        | 237/2000 [04:10<29:07,  1.01it/s]

Training_loss 17.75738


 12%|█▏        | 238/2000 [04:11<29:41,  1.01s/it]

Training_loss 17.66721


 12%|█▏        | 239/2000 [04:12<29:04,  1.01it/s]

Training_loss 17.53839


 12%|█▏        | 240/2000 [04:13<29:20,  1.00s/it]

Training_loss 17.58695


 12%|█▏        | 241/2000 [04:14<28:49,  1.02it/s]

Training_loss 17.80855


 12%|█▏        | 242/2000 [04:15<29:29,  1.01s/it]

Training_loss 17.73813


 12%|█▏        | 243/2000 [04:16<29:55,  1.02s/it]

Training_loss 17.65489


 12%|█▏        | 244/2000 [04:17<29:47,  1.02s/it]

Training_loss 17.94398


 12%|█▏        | 245/2000 [04:18<29:08,  1.00it/s]

Training_loss 17.89610


 12%|█▏        | 246/2000 [04:19<28:38,  1.02it/s]

Training_loss 18.04804


 12%|█▏        | 247/2000 [04:20<27:59,  1.04it/s]

Training_loss 18.01357


 12%|█▏        | 248/2000 [04:21<29:35,  1.01s/it]

Training_loss 17.98191


 12%|█▏        | 249/2000 [04:22<30:55,  1.06s/it]

Training_loss 17.76595


 12%|█▎        | 250/2000 [04:23<30:12,  1.04s/it]

Training_loss 17.97902


 13%|█▎        | 251/2000 [04:24<29:24,  1.01s/it]

Training_loss 17.78479


 13%|█▎        | 252/2000 [04:25<29:06,  1.00it/s]

Training_loss 17.80113


 13%|█▎        | 253/2000 [04:26<29:05,  1.00it/s]

Training_loss 17.55615


 13%|█▎        | 254/2000 [04:27<29:15,  1.01s/it]

Training_loss 17.77791


 13%|█▎        | 255/2000 [04:28<28:44,  1.01it/s]

Training_loss 17.71692


 13%|█▎        | 256/2000 [04:29<28:48,  1.01it/s]

Training_loss 17.76436


 13%|█▎        | 257/2000 [04:30<28:50,  1.01it/s]

Training_loss 17.78037


 13%|█▎        | 258/2000 [04:31<27:58,  1.04it/s]

Training_loss 17.99737


 13%|█▎        | 259/2000 [04:32<28:16,  1.03it/s]

Training_loss 18.08337


 13%|█▎        | 260/2000 [04:33<28:44,  1.01it/s]

Training_loss 18.14132


 13%|█▎        | 261/2000 [04:34<28:50,  1.00it/s]

Training_loss 18.09454


 13%|█▎        | 262/2000 [04:35<29:04,  1.00s/it]

Training_loss 18.15974


 13%|█▎        | 263/2000 [04:36<29:05,  1.00s/it]

Training_loss 18.16353


 13%|█▎        | 264/2000 [04:37<31:38,  1.09s/it]

Training_loss 18.23503


 13%|█▎        | 265/2000 [04:38<31:41,  1.10s/it]

Training_loss 18.14514


 13%|█▎        | 266/2000 [04:39<30:11,  1.04s/it]

Training_loss 18.06022


 13%|█▎        | 267/2000 [04:40<29:50,  1.03s/it]

Training_loss 18.10526


 13%|█▎        | 268/2000 [04:41<29:44,  1.03s/it]

Training_loss 18.16762


 13%|█▎        | 269/2000 [04:42<32:16,  1.12s/it]

Training_loss 18.20450


 14%|█▎        | 270/2000 [04:44<34:59,  1.21s/it]

Training_loss 18.30278


 14%|█▎        | 271/2000 [04:45<33:59,  1.18s/it]

Training_loss 18.35299


 14%|█▎        | 272/2000 [04:46<31:47,  1.10s/it]

Training_loss 18.51572


 14%|█▎        | 273/2000 [04:47<30:32,  1.06s/it]

Training_loss 18.66735


 14%|█▎        | 274/2000 [04:48<30:30,  1.06s/it]

Training_loss 18.52991


 14%|█▍        | 275/2000 [04:49<29:16,  1.02s/it]

Training_loss 18.24260


 14%|█▍        | 276/2000 [04:50<28:52,  1.00s/it]

Training_loss 18.61764


 14%|█▍        | 277/2000 [04:51<28:24,  1.01it/s]

Training_loss 18.71488


 14%|█▍        | 278/2000 [04:52<28:29,  1.01it/s]

Training_loss 18.47978


 14%|█▍        | 279/2000 [04:53<29:36,  1.03s/it]

Training_loss 18.44006


 14%|█▍        | 280/2000 [04:54<30:33,  1.07s/it]

Training_loss 18.61781


 14%|█▍        | 281/2000 [04:55<30:03,  1.05s/it]

Training_loss 18.62521


 14%|█▍        | 282/2000 [04:56<29:12,  1.02s/it]

Training_loss 18.51729


 14%|█▍        | 283/2000 [04:57<28:27,  1.01it/s]

Training_loss 18.43879


 14%|█▍        | 284/2000 [04:58<28:25,  1.01it/s]

Training_loss 18.37910


 14%|█▍        | 285/2000 [04:59<28:07,  1.02it/s]

Training_loss 18.21521


 14%|█▍        | 286/2000 [05:00<28:51,  1.01s/it]

Training_loss 18.21683


 14%|█▍        | 287/2000 [05:01<28:27,  1.00it/s]

Training_loss 18.42045


 14%|█▍        | 288/2000 [05:02<28:05,  1.02it/s]

Training_loss 18.59571


 14%|█▍        | 289/2000 [05:03<27:48,  1.03it/s]

Training_loss 18.67215


 14%|█▍        | 290/2000 [05:04<27:08,  1.05it/s]

Training_loss 18.47088


 15%|█▍        | 291/2000 [05:05<26:45,  1.06it/s]

Training_loss 18.30576


 15%|█▍        | 292/2000 [05:06<27:09,  1.05it/s]

Training_loss 18.35169


 15%|█▍        | 293/2000 [05:07<26:46,  1.06it/s]

Training_loss 18.43360


 15%|█▍        | 294/2000 [05:07<26:45,  1.06it/s]

Training_loss 18.46043


 15%|█▍        | 295/2000 [05:09<27:43,  1.03it/s]

Training_loss 18.49732


 15%|█▍        | 296/2000 [05:10<29:08,  1.03s/it]

Training_loss 18.56598


 15%|█▍        | 297/2000 [05:11<28:25,  1.00s/it]

Training_loss 18.58094


 15%|█▍        | 298/2000 [05:12<28:07,  1.01it/s]

Training_loss 18.44152


 15%|█▍        | 299/2000 [05:13<27:44,  1.02it/s]

Training_loss 18.36324


 15%|█▌        | 300/2000 [05:13<27:46,  1.02it/s]

Training_loss 18.34961


 15%|█▌        | 301/2000 [05:14<27:04,  1.05it/s]

Training_loss 18.08156


 15%|█▌        | 302/2000 [05:15<27:08,  1.04it/s]

Training_loss 18.16788


 15%|█▌        | 303/2000 [05:16<27:24,  1.03it/s]

Training_loss 18.04650


 15%|█▌        | 304/2000 [05:17<27:19,  1.03it/s]

Training_loss 18.10025


 15%|█▌        | 305/2000 [05:18<27:37,  1.02it/s]

Training_loss 18.13451


 15%|█▌        | 306/2000 [05:19<27:22,  1.03it/s]

Training_loss 18.05825


 15%|█▌        | 307/2000 [05:20<28:08,  1.00it/s]

Training_loss 18.17970


 15%|█▌        | 308/2000 [05:21<27:42,  1.02it/s]

Training_loss 18.03344


 15%|█▌        | 309/2000 [05:22<27:56,  1.01it/s]

Training_loss 18.34496


 16%|█▌        | 310/2000 [05:23<28:12,  1.00s/it]

Training_loss 18.43413


 16%|█▌        | 311/2000 [05:24<29:24,  1.04s/it]

Training_loss 18.40772


 16%|█▌        | 312/2000 [05:26<31:09,  1.11s/it]

Training_loss 18.47838


 16%|█▌        | 313/2000 [05:27<30:15,  1.08s/it]

Training_loss 18.48005


 16%|█▌        | 314/2000 [05:28<29:38,  1.06s/it]

Training_loss 18.31871


 16%|█▌        | 315/2000 [05:29<28:40,  1.02s/it]

Training_loss 18.53025


 16%|█▌        | 316/2000 [05:30<28:43,  1.02s/it]

Training_loss 18.67699


 16%|█▌        | 317/2000 [05:31<29:30,  1.05s/it]

Training_loss 18.66541


 16%|█▌        | 318/2000 [05:32<30:22,  1.08s/it]

Training_loss 18.47421


 16%|█▌        | 319/2000 [05:33<30:05,  1.07s/it]

Training_loss 18.60832


 16%|█▌        | 320/2000 [05:34<29:38,  1.06s/it]

Training_loss 18.79123


 16%|█▌        | 321/2000 [05:35<28:55,  1.03s/it]

Training_loss 18.89242


 16%|█▌        | 322/2000 [05:36<29:04,  1.04s/it]

Training_loss 19.10278


 16%|█▌        | 323/2000 [05:37<28:36,  1.02s/it]

Training_loss 19.23746


 16%|█▌        | 324/2000 [05:38<28:06,  1.01s/it]

Training_loss 19.31340


 16%|█▋        | 325/2000 [05:39<27:27,  1.02it/s]

Training_loss 19.16676


 16%|█▋        | 326/2000 [05:40<27:17,  1.02it/s]

Training_loss 18.99803


 16%|█▋        | 327/2000 [05:41<29:27,  1.06s/it]

Training_loss 18.90655


 16%|█▋        | 328/2000 [05:42<31:01,  1.11s/it]

Training_loss 19.08503


 16%|█▋        | 329/2000 [05:44<32:48,  1.18s/it]

Training_loss 19.16585


 16%|█▋        | 330/2000 [05:45<34:59,  1.26s/it]

Training_loss 19.11270


 17%|█▋        | 331/2000 [05:46<34:07,  1.23s/it]

Training_loss 19.02385


 17%|█▋        | 332/2000 [05:48<33:56,  1.22s/it]

Training_loss 18.91751


 17%|█▋        | 333/2000 [05:49<32:43,  1.18s/it]

Training_loss 18.85500


 17%|█▋        | 334/2000 [05:50<30:41,  1.11s/it]

Training_loss 18.82433


 17%|█▋        | 335/2000 [05:51<29:32,  1.06s/it]

Training_loss 18.71436


 17%|█▋        | 336/2000 [05:51<28:39,  1.03s/it]

Training_loss 18.61282


 17%|█▋        | 337/2000 [05:52<28:05,  1.01s/it]

Training_loss 18.51421


 17%|█▋        | 338/2000 [05:53<27:36,  1.00it/s]

Training_loss 18.83020


 17%|█▋        | 339/2000 [05:54<26:11,  1.06it/s]

Training_loss 18.92365


 17%|█▋        | 340/2000 [05:55<25:26,  1.09it/s]

Training_loss 18.85648


 17%|█▋        | 341/2000 [05:56<24:30,  1.13it/s]

Training_loss 18.75439


 17%|█▋        | 342/2000 [05:57<23:58,  1.15it/s]

Training_loss 18.89262


 17%|█▋        | 343/2000 [05:58<25:01,  1.10it/s]

Training_loss 18.92350


 17%|█▋        | 344/2000 [05:59<24:45,  1.12it/s]

Training_loss 19.12058


 17%|█▋        | 345/2000 [05:59<24:36,  1.12it/s]

Training_loss 19.08009


 17%|█▋        | 346/2000 [06:00<24:14,  1.14it/s]

Training_loss 19.20488


 17%|█▋        | 347/2000 [06:01<25:49,  1.07it/s]

Training_loss 19.23672


 17%|█▋        | 348/2000 [06:02<27:09,  1.01it/s]

Training_loss 19.07341


 17%|█▋        | 349/2000 [06:03<25:51,  1.06it/s]

Training_loss 19.04250


 18%|█▊        | 350/2000 [06:04<25:52,  1.06it/s]

Training_loss 19.10446


 18%|█▊        | 351/2000 [06:05<26:49,  1.02it/s]

Training_loss 19.04825


 18%|█▊        | 352/2000 [06:06<25:52,  1.06it/s]

Training_loss 19.21339


 18%|█▊        | 353/2000 [06:07<24:20,  1.13it/s]

Training_loss 19.27459


 18%|█▊        | 354/2000 [06:08<25:20,  1.08it/s]

Training_loss 19.61048


 18%|█▊        | 355/2000 [06:09<27:51,  1.02s/it]

Training_loss 19.48510


 18%|█▊        | 356/2000 [06:10<26:14,  1.04it/s]

Training_loss 19.40458


 18%|█▊        | 357/2000 [06:11<25:47,  1.06it/s]

Training_loss 19.34939


 18%|█▊        | 358/2000 [06:12<25:24,  1.08it/s]

Training_loss 18.79554


 18%|█▊        | 359/2000 [06:13<24:56,  1.10it/s]

Training_loss 19.20301


 18%|█▊        | 360/2000 [06:14<25:30,  1.07it/s]

Training_loss 19.72752


 18%|█▊        | 361/2000 [06:15<25:44,  1.06it/s]

Training_loss 19.62297


 18%|█▊        | 362/2000 [06:16<25:30,  1.07it/s]

Training_loss 19.34026


 18%|█▊        | 363/2000 [06:16<24:50,  1.10it/s]

Training_loss 19.54782


 18%|█▊        | 364/2000 [06:17<25:24,  1.07it/s]

Training_loss 19.61443


 18%|█▊        | 365/2000 [06:18<25:23,  1.07it/s]

Training_loss 19.58029


 18%|█▊        | 366/2000 [06:19<26:03,  1.05it/s]

Training_loss 19.59227


 18%|█▊        | 367/2000 [06:20<24:37,  1.10it/s]

Training_loss 19.61540


 18%|█▊        | 368/2000 [06:21<24:43,  1.10it/s]

Training_loss 19.27680


 18%|█▊        | 369/2000 [06:22<23:20,  1.16it/s]

Training_loss 19.29458


 18%|█▊        | 370/2000 [06:23<25:19,  1.07it/s]

Training_loss 19.31713


 19%|█▊        | 371/2000 [06:24<25:51,  1.05it/s]

Training_loss 19.29061


 19%|█▊        | 372/2000 [06:25<24:00,  1.13it/s]

Training_loss 19.61983


 19%|█▊        | 373/2000 [06:25<23:54,  1.13it/s]

Training_loss 19.56030


 19%|█▊        | 374/2000 [06:26<22:47,  1.19it/s]

Training_loss 19.48640


 19%|█▉        | 375/2000 [06:27<23:23,  1.16it/s]

Training_loss 19.52450


 19%|█▉        | 376/2000 [06:28<24:33,  1.10it/s]

Training_loss 19.60127


 19%|█▉        | 377/2000 [06:29<25:00,  1.08it/s]

Training_loss 19.49934


 19%|█▉        | 378/2000 [06:30<24:05,  1.12it/s]

Training_loss 19.47191


 19%|█▉        | 379/2000 [06:31<25:28,  1.06it/s]

Training_loss 19.64227


 19%|█▉        | 380/2000 [06:32<24:18,  1.11it/s]

Training_loss 19.70127


 19%|█▉        | 381/2000 [06:33<24:15,  1.11it/s]

Training_loss 19.99783


 19%|█▉        | 382/2000 [06:33<23:23,  1.15it/s]

Training_loss 19.80106


 19%|█▉        | 383/2000 [06:34<23:01,  1.17it/s]

Training_loss 20.05996


 19%|█▉        | 384/2000 [06:35<22:58,  1.17it/s]

Training_loss 19.78150


 19%|█▉        | 385/2000 [06:36<22:12,  1.21it/s]

Training_loss 19.98317


 19%|█▉        | 386/2000 [06:37<22:35,  1.19it/s]

Training_loss 20.31532


 19%|█▉        | 387/2000 [06:38<22:40,  1.19it/s]

Training_loss 20.35626


 19%|█▉        | 388/2000 [06:39<22:52,  1.17it/s]

Training_loss 20.13639


 19%|█▉        | 389/2000 [06:39<23:20,  1.15it/s]

Training_loss 20.14855


 20%|█▉        | 390/2000 [06:40<22:29,  1.19it/s]

Training_loss 20.00340


 20%|█▉        | 391/2000 [06:41<24:00,  1.12it/s]

Training_loss 19.73720


 20%|█▉        | 392/2000 [06:42<24:48,  1.08it/s]

Training_loss 19.83940


 20%|█▉        | 393/2000 [06:43<24:38,  1.09it/s]

Training_loss 19.72452


 20%|█▉        | 394/2000 [06:44<25:42,  1.04it/s]

Training_loss 19.88871


 20%|█▉        | 395/2000 [06:45<23:58,  1.12it/s]

Training_loss 20.05504


 20%|█▉        | 396/2000 [06:46<24:54,  1.07it/s]

Training_loss 19.86286


 20%|█▉        | 397/2000 [06:47<23:20,  1.14it/s]

Training_loss 19.69008


 20%|█▉        | 398/2000 [06:48<23:22,  1.14it/s]

Training_loss 19.67981


 20%|█▉        | 399/2000 [06:48<23:03,  1.16it/s]

Training_loss 19.69153


 20%|██        | 400/2000 [06:49<22:24,  1.19it/s]

Training_loss 19.51508


 20%|██        | 401/2000 [06:50<22:24,  1.19it/s]

Training_loss 19.56568


 20%|██        | 402/2000 [06:51<23:51,  1.12it/s]

Training_loss 19.57464


 20%|██        | 403/2000 [06:52<23:40,  1.12it/s]

Training_loss 19.73476


 20%|██        | 404/2000 [06:53<22:39,  1.17it/s]

Training_loss 19.49245


 20%|██        | 405/2000 [06:54<23:23,  1.14it/s]

Training_loss 19.62626


 20%|██        | 406/2000 [06:55<23:25,  1.13it/s]

Training_loss 19.69131


 20%|██        | 407/2000 [06:56<25:13,  1.05it/s]

Training_loss 19.66969


 20%|██        | 408/2000 [06:57<24:39,  1.08it/s]

Training_loss 19.57636


 20%|██        | 409/2000 [06:57<23:46,  1.12it/s]

Training_loss 19.68101


 20%|██        | 410/2000 [06:58<23:05,  1.15it/s]

Training_loss 19.80727


 21%|██        | 411/2000 [06:59<22:03,  1.20it/s]

Training_loss 19.70603


 21%|██        | 412/2000 [07:00<21:45,  1.22it/s]

Training_loss 19.60690


 21%|██        | 413/2000 [07:01<22:23,  1.18it/s]

Training_loss 19.92473


 21%|██        | 414/2000 [07:01<22:41,  1.16it/s]

Training_loss 19.65027


 21%|██        | 415/2000 [07:03<24:42,  1.07it/s]

Training_loss 19.45422


 21%|██        | 416/2000 [07:03<24:25,  1.08it/s]

Training_loss 19.24208


 21%|██        | 417/2000 [07:04<24:35,  1.07it/s]

Training_loss 19.47092


 21%|██        | 418/2000 [07:05<24:09,  1.09it/s]

Training_loss 19.74318


 21%|██        | 419/2000 [07:06<26:13,  1.01it/s]

Training_loss 19.75564


 21%|██        | 420/2000 [07:08<26:24,  1.00s/it]

Training_loss 19.91204


 21%|██        | 421/2000 [07:09<27:31,  1.05s/it]

Training_loss 19.83065


 21%|██        | 422/2000 [07:09<25:14,  1.04it/s]

Training_loss 19.77567


 21%|██        | 423/2000 [07:10<25:42,  1.02it/s]

Training_loss 20.11931


 21%|██        | 424/2000 [07:11<24:26,  1.07it/s]

Training_loss 20.08510


 21%|██▏       | 425/2000 [07:12<22:44,  1.15it/s]

Training_loss 20.01805


 21%|██▏       | 426/2000 [07:13<24:41,  1.06it/s]

Training_loss 19.78401


 21%|██▏       | 427/2000 [07:14<25:15,  1.04it/s]

Training_loss 19.88122


 21%|██▏       | 428/2000 [07:15<24:25,  1.07it/s]

Training_loss 20.08508


 21%|██▏       | 429/2000 [07:16<24:02,  1.09it/s]

Training_loss 20.23194


 22%|██▏       | 430/2000 [07:17<23:04,  1.13it/s]

Training_loss 20.23117


 22%|██▏       | 431/2000 [07:18<23:18,  1.12it/s]

Training_loss 19.81097


 22%|██▏       | 432/2000 [07:18<22:25,  1.17it/s]

Training_loss 20.16743


 22%|██▏       | 433/2000 [07:19<22:14,  1.17it/s]

Training_loss 20.24771


 22%|██▏       | 434/2000 [07:20<21:31,  1.21it/s]

Training_loss 20.27233


 22%|██▏       | 435/2000 [07:21<21:38,  1.21it/s]

Training_loss 20.54530


 22%|██▏       | 436/2000 [07:22<21:15,  1.23it/s]

Training_loss 20.39517


 22%|██▏       | 437/2000 [07:22<20:42,  1.26it/s]

Training_loss 20.21695


 22%|██▏       | 438/2000 [07:23<22:02,  1.18it/s]

Training_loss 20.46122


 22%|██▏       | 439/2000 [07:24<22:18,  1.17it/s]

Training_loss 20.46366


 22%|██▏       | 440/2000 [07:25<21:43,  1.20it/s]

Training_loss 20.35048


 22%|██▏       | 441/2000 [07:26<22:39,  1.15it/s]

Training_loss 20.11337


 22%|██▏       | 442/2000 [07:27<22:23,  1.16it/s]

Training_loss 20.15001


 22%|██▏       | 443/2000 [07:28<22:13,  1.17it/s]

Training_loss 19.93184


 22%|██▏       | 444/2000 [07:29<22:44,  1.14it/s]

Training_loss 19.85463


 22%|██▏       | 445/2000 [07:29<21:51,  1.19it/s]

Training_loss 20.00288


 22%|██▏       | 446/2000 [07:30<21:46,  1.19it/s]

Training_loss 19.77002


 22%|██▏       | 447/2000 [07:31<22:14,  1.16it/s]

Training_loss 19.97926


 22%|██▏       | 448/2000 [07:32<22:12,  1.16it/s]

Training_loss 19.93869


 22%|██▏       | 449/2000 [07:33<21:41,  1.19it/s]

Training_loss 20.19463


 22%|██▎       | 450/2000 [07:34<22:28,  1.15it/s]

Training_loss 20.00962


 23%|██▎       | 451/2000 [07:34<21:55,  1.18it/s]

Training_loss 19.78911


 23%|██▎       | 452/2000 [07:35<22:14,  1.16it/s]

Training_loss 19.86264


 23%|██▎       | 453/2000 [07:36<21:26,  1.20it/s]

Training_loss 19.85024


 23%|██▎       | 454/2000 [07:37<21:07,  1.22it/s]

Training_loss 19.84796


 23%|██▎       | 455/2000 [07:38<21:38,  1.19it/s]

Training_loss 19.83776


 23%|██▎       | 456/2000 [07:39<23:37,  1.09it/s]

Training_loss 19.66984


 23%|██▎       | 457/2000 [07:40<23:31,  1.09it/s]

Training_loss 20.22336


 23%|██▎       | 458/2000 [07:41<23:35,  1.09it/s]

Training_loss 20.07794


 23%|██▎       | 459/2000 [07:42<23:56,  1.07it/s]

Training_loss 20.04716


 23%|██▎       | 460/2000 [07:43<24:48,  1.03it/s]

Training_loss 19.99001


 23%|██▎       | 461/2000 [07:44<24:03,  1.07it/s]

Training_loss 19.71977


 23%|██▎       | 462/2000 [07:44<23:29,  1.09it/s]

Training_loss 19.81873


 23%|██▎       | 463/2000 [07:45<22:06,  1.16it/s]

Training_loss 19.86698


 23%|██▎       | 464/2000 [07:46<21:15,  1.20it/s]

Training_loss 19.77149


 23%|██▎       | 465/2000 [07:47<22:34,  1.13it/s]

Training_loss 19.66590


 23%|██▎       | 466/2000 [07:48<21:45,  1.18it/s]

Training_loss 19.78390


 23%|██▎       | 467/2000 [07:48<21:19,  1.20it/s]

Training_loss 19.61321


 23%|██▎       | 468/2000 [07:49<22:07,  1.15it/s]

Training_loss 19.70659


 23%|██▎       | 469/2000 [07:50<21:38,  1.18it/s]

Training_loss 19.67811


 24%|██▎       | 470/2000 [07:51<20:59,  1.21it/s]

Training_loss 19.68814


 24%|██▎       | 471/2000 [07:52<20:29,  1.24it/s]

Training_loss 20.01318


 24%|██▎       | 472/2000 [07:53<20:14,  1.26it/s]

Training_loss 20.10028


 24%|██▎       | 473/2000 [07:54<22:00,  1.16it/s]

Training_loss 20.28684


 24%|██▎       | 474/2000 [07:55<24:10,  1.05it/s]

Training_loss 20.34407


 24%|██▍       | 475/2000 [07:56<25:34,  1.01s/it]

Training_loss 20.08434


 24%|██▍       | 476/2000 [07:57<24:12,  1.05it/s]

Training_loss 20.32524


 24%|██▍       | 477/2000 [07:58<24:33,  1.03it/s]

Training_loss 20.60201


 24%|██▍       | 478/2000 [07:59<23:46,  1.07it/s]

Training_loss 20.78865


 24%|██▍       | 479/2000 [07:59<22:39,  1.12it/s]

Training_loss 20.53464


 24%|██▍       | 480/2000 [08:00<23:12,  1.09it/s]

Training_loss 20.39107


 24%|██▍       | 481/2000 [08:01<23:49,  1.06it/s]

Training_loss 20.44018


 24%|██▍       | 482/2000 [08:02<23:04,  1.10it/s]

Training_loss 20.06377


 24%|██▍       | 483/2000 [08:03<22:37,  1.12it/s]

Training_loss 20.21255


 24%|██▍       | 484/2000 [08:04<22:29,  1.12it/s]

Training_loss 20.18932


 24%|██▍       | 485/2000 [08:05<22:06,  1.14it/s]

Training_loss 20.08478


 24%|██▍       | 486/2000 [08:06<23:03,  1.09it/s]

Training_loss 20.11142


 24%|██▍       | 487/2000 [08:07<22:11,  1.14it/s]

Training_loss 20.18723


 24%|██▍       | 488/2000 [08:07<21:03,  1.20it/s]

Training_loss 20.05579


 24%|██▍       | 489/2000 [08:08<22:58,  1.10it/s]

Training_loss 20.15753


 24%|██▍       | 490/2000 [08:09<22:51,  1.10it/s]

Training_loss 20.56700


 25%|██▍       | 491/2000 [08:10<23:07,  1.09it/s]

Training_loss 20.75647


 25%|██▍       | 492/2000 [08:11<21:46,  1.15it/s]

Training_loss 20.54120


 25%|██▍       | 493/2000 [08:12<23:31,  1.07it/s]

Training_loss 20.47009


 25%|██▍       | 494/2000 [08:13<23:01,  1.09it/s]

Training_loss 20.26814


 25%|██▍       | 495/2000 [08:14<22:56,  1.09it/s]

Training_loss 20.21552


 25%|██▍       | 496/2000 [08:15<22:32,  1.11it/s]

Training_loss 20.18153


 25%|██▍       | 497/2000 [08:16<22:56,  1.09it/s]

Training_loss 19.86176


 25%|██▍       | 498/2000 [08:16<21:35,  1.16it/s]

Training_loss 19.86799


 25%|██▍       | 499/2000 [08:17<21:35,  1.16it/s]

Training_loss 19.94063


 25%|██▌       | 500/2000 [08:18<21:35,  1.16it/s]

Training_loss 20.11347


 25%|██▌       | 501/2000 [08:19<21:39,  1.15it/s]

Training_loss 20.12111


 25%|██▌       | 502/2000 [08:20<22:05,  1.13it/s]

Training_loss 20.08533


 25%|██▌       | 503/2000 [08:21<21:14,  1.17it/s]

Training_loss 20.49657


 25%|██▌       | 504/2000 [08:22<22:12,  1.12it/s]

Training_loss 20.60809


 25%|██▌       | 505/2000 [08:23<23:02,  1.08it/s]

Training_loss 20.80255


 25%|██▌       | 506/2000 [08:24<24:37,  1.01it/s]

Training_loss 20.77705


 25%|██▌       | 507/2000 [08:25<23:08,  1.08it/s]

Training_loss 20.82935


 25%|██▌       | 508/2000 [08:25<22:13,  1.12it/s]

Training_loss 20.78432


 25%|██▌       | 509/2000 [08:26<23:06,  1.08it/s]

Training_loss 20.60368


 26%|██▌       | 510/2000 [08:28<25:44,  1.04s/it]

Training_loss 20.52085


 26%|██▌       | 511/2000 [08:29<24:33,  1.01it/s]

Training_loss 20.51033


 26%|██▌       | 512/2000 [08:30<24:47,  1.00it/s]

Training_loss 20.49127


 26%|██▌       | 513/2000 [08:30<23:21,  1.06it/s]

Training_loss 20.58530


 26%|██▌       | 514/2000 [08:31<22:05,  1.12it/s]

Training_loss 20.54682


 26%|██▌       | 515/2000 [08:32<21:21,  1.16it/s]

Training_loss 20.38111


 26%|██▌       | 516/2000 [08:33<20:42,  1.19it/s]

Training_loss 20.52481


 26%|██▌       | 517/2000 [08:34<22:47,  1.08it/s]

Training_loss 20.79810


 26%|██▌       | 518/2000 [08:35<23:13,  1.06it/s]

Training_loss 20.89299


 26%|██▌       | 519/2000 [08:36<22:25,  1.10it/s]

Training_loss 20.76365


 26%|██▌       | 520/2000 [08:37<21:49,  1.13it/s]

Training_loss 20.79340


 26%|██▌       | 521/2000 [08:37<21:28,  1.15it/s]

Training_loss 20.32791


 26%|██▌       | 522/2000 [08:38<21:34,  1.14it/s]

Training_loss 20.62046


 26%|██▌       | 523/2000 [08:39<23:39,  1.04it/s]

Training_loss 20.44111


 26%|██▌       | 524/2000 [08:40<23:33,  1.04it/s]

Training_loss 20.36830


 26%|██▋       | 525/2000 [08:41<23:42,  1.04it/s]

Training_loss 20.43660


 26%|██▋       | 526/2000 [08:42<23:49,  1.03it/s]

Training_loss 20.57564


 26%|██▋       | 527/2000 [08:43<23:10,  1.06it/s]

Training_loss 20.75545


 26%|██▋       | 528/2000 [08:44<22:06,  1.11it/s]

Training_loss 20.85327


 26%|██▋       | 529/2000 [08:45<21:57,  1.12it/s]

Training_loss 20.98000


 26%|██▋       | 530/2000 [08:46<23:32,  1.04it/s]

Training_loss 20.90632


 27%|██▋       | 531/2000 [08:47<22:36,  1.08it/s]

Training_loss 21.13136


 27%|██▋       | 532/2000 [08:48<22:45,  1.08it/s]

Training_loss 20.95011


 27%|██▋       | 533/2000 [08:49<22:32,  1.08it/s]

Training_loss 20.73090


 27%|██▋       | 534/2000 [08:50<23:14,  1.05it/s]

Training_loss 20.99915


 27%|██▋       | 535/2000 [08:51<22:22,  1.09it/s]

Training_loss 21.03727


 27%|██▋       | 536/2000 [08:51<21:48,  1.12it/s]

Training_loss 20.94726


 27%|██▋       | 537/2000 [08:52<21:06,  1.15it/s]

Training_loss 21.18640


 27%|██▋       | 538/2000 [08:53<20:30,  1.19it/s]

Training_loss 21.24777


 27%|██▋       | 539/2000 [08:54<21:10,  1.15it/s]

Training_loss 20.90788


 27%|██▋       | 540/2000 [08:55<20:38,  1.18it/s]

Training_loss 20.56273


 27%|██▋       | 541/2000 [08:56<23:24,  1.04it/s]

Training_loss 20.65446


 27%|██▋       | 542/2000 [08:57<22:52,  1.06it/s]

Training_loss 20.58423


 27%|██▋       | 543/2000 [08:58<21:32,  1.13it/s]

Training_loss 20.80112


 27%|██▋       | 544/2000 [08:58<20:38,  1.18it/s]

Training_loss 20.86930


 27%|██▋       | 545/2000 [08:59<21:53,  1.11it/s]

Training_loss 20.83668


 27%|██▋       | 546/2000 [09:00<21:04,  1.15it/s]

Training_loss 20.80158


 27%|██▋       | 547/2000 [09:01<21:16,  1.14it/s]

Training_loss 21.04638


 27%|██▋       | 548/2000 [09:02<22:54,  1.06it/s]

Training_loss 21.03635


 27%|██▋       | 549/2000 [09:03<21:14,  1.14it/s]

Training_loss 21.11621


 28%|██▊       | 550/2000 [09:04<20:35,  1.17it/s]

Training_loss 21.12091


 28%|██▊       | 551/2000 [09:05<20:37,  1.17it/s]

Training_loss 20.76695


 28%|██▊       | 552/2000 [09:05<20:04,  1.20it/s]

Training_loss 21.07999


 28%|██▊       | 553/2000 [09:06<19:41,  1.22it/s]

Training_loss 21.24272


 28%|██▊       | 554/2000 [09:07<20:29,  1.18it/s]

Training_loss 21.16449


 28%|██▊       | 555/2000 [09:08<21:02,  1.14it/s]

Training_loss 21.15853


 28%|██▊       | 556/2000 [09:09<20:11,  1.19it/s]

Training_loss 20.64405


 28%|██▊       | 557/2000 [09:10<21:22,  1.12it/s]

Training_loss 20.59977


 28%|██▊       | 558/2000 [09:11<22:32,  1.07it/s]

Training_loss 20.42678


 28%|██▊       | 559/2000 [09:12<22:13,  1.08it/s]

Training_loss 20.61317


 28%|██▊       | 560/2000 [09:13<22:44,  1.06it/s]

Training_loss 20.57043


 28%|██▊       | 561/2000 [09:13<21:22,  1.12it/s]

Training_loss 20.41334


 28%|██▊       | 562/2000 [09:14<22:08,  1.08it/s]

Training_loss 20.25369


 28%|██▊       | 563/2000 [09:15<22:14,  1.08it/s]

Training_loss 20.26162


 28%|██▊       | 564/2000 [09:16<21:43,  1.10it/s]

Training_loss 20.22110


 28%|██▊       | 565/2000 [09:17<20:55,  1.14it/s]

Training_loss 20.32035


 28%|██▊       | 566/2000 [09:18<20:33,  1.16it/s]

Training_loss 19.91585


 28%|██▊       | 567/2000 [09:19<21:32,  1.11it/s]

Training_loss 19.80860


 28%|██▊       | 568/2000 [09:20<21:54,  1.09it/s]

Training_loss 19.63799


 28%|██▊       | 569/2000 [09:21<20:47,  1.15it/s]

Training_loss 19.58844


 28%|██▊       | 570/2000 [09:22<22:50,  1.04it/s]

Training_loss 19.92428


 29%|██▊       | 571/2000 [09:23<22:15,  1.07it/s]

Training_loss 19.79938


 29%|██▊       | 572/2000 [09:23<21:01,  1.13it/s]

Training_loss 19.75519


 29%|██▊       | 573/2000 [09:24<20:50,  1.14it/s]

Training_loss 19.92977


 29%|██▊       | 574/2000 [09:25<20:26,  1.16it/s]

Training_loss 19.78358


 29%|██▉       | 575/2000 [09:26<22:17,  1.07it/s]

Training_loss 20.09251


 29%|██▉       | 576/2000 [09:27<21:05,  1.13it/s]

Training_loss 20.02625


 29%|██▉       | 577/2000 [09:28<23:35,  1.01it/s]

Training_loss 20.01720


 29%|██▉       | 578/2000 [09:29<24:40,  1.04s/it]

Training_loss 19.91643


 29%|██▉       | 579/2000 [09:30<24:22,  1.03s/it]

Training_loss 20.30725


 29%|██▉       | 580/2000 [09:31<22:35,  1.05it/s]

Training_loss 20.44121


 29%|██▉       | 581/2000 [09:32<21:44,  1.09it/s]

Training_loss 20.72764


 29%|██▉       | 582/2000 [09:33<22:46,  1.04it/s]

Training_loss 20.56471


 29%|██▉       | 583/2000 [09:34<21:43,  1.09it/s]

Training_loss 20.62685


 29%|██▉       | 584/2000 [09:35<21:07,  1.12it/s]

Training_loss 20.56766


 29%|██▉       | 585/2000 [09:36<20:30,  1.15it/s]

Training_loss 20.66387


 29%|██▉       | 586/2000 [09:36<21:13,  1.11it/s]

Training_loss 20.84410


 29%|██▉       | 587/2000 [09:37<21:33,  1.09it/s]

Training_loss 20.81886


 29%|██▉       | 588/2000 [09:38<21:40,  1.09it/s]

Training_loss 20.82960


 29%|██▉       | 589/2000 [09:39<21:24,  1.10it/s]

Training_loss 20.89786


 30%|██▉       | 590/2000 [09:40<20:17,  1.16it/s]

Training_loss 20.76389


 30%|██▉       | 591/2000 [09:41<19:45,  1.19it/s]

Training_loss 20.61988


 30%|██▉       | 592/2000 [09:42<21:08,  1.11it/s]

Training_loss 20.50601


 30%|██▉       | 593/2000 [09:43<21:28,  1.09it/s]

Training_loss 20.73306


 30%|██▉       | 594/2000 [09:44<21:22,  1.10it/s]

Training_loss 20.67305


 30%|██▉       | 595/2000 [09:44<20:28,  1.14it/s]

Training_loss 20.88350


 30%|██▉       | 596/2000 [09:45<20:21,  1.15it/s]

Training_loss 20.74842


 30%|██▉       | 597/2000 [09:46<20:38,  1.13it/s]

Training_loss 20.87652


 30%|██▉       | 598/2000 [09:47<21:06,  1.11it/s]

Training_loss 21.49323


 30%|██▉       | 599/2000 [09:48<23:28,  1.01s/it]

Training_loss 21.35489


 30%|███       | 600/2000 [09:49<22:31,  1.04it/s]

Training_loss 20.86908


 30%|███       | 601/2000 [09:50<21:21,  1.09it/s]

Training_loss 20.80573


 30%|███       | 602/2000 [09:51<22:17,  1.05it/s]

Training_loss 21.02522


 30%|███       | 603/2000 [09:52<20:44,  1.12it/s]

Training_loss 21.17496


 30%|███       | 604/2000 [09:53<20:33,  1.13it/s]

Training_loss 21.07174


 30%|███       | 605/2000 [09:54<20:23,  1.14it/s]

Training_loss 21.25382


 30%|███       | 606/2000 [09:54<19:57,  1.16it/s]

Training_loss 21.02408


 30%|███       | 607/2000 [09:55<20:29,  1.13it/s]

Training_loss 21.11170


 30%|███       | 608/2000 [09:56<20:29,  1.13it/s]

Training_loss 21.33818


 30%|███       | 609/2000 [09:57<20:25,  1.14it/s]

Training_loss 21.20123


 30%|███       | 610/2000 [09:58<19:45,  1.17it/s]

Training_loss 21.25402


 31%|███       | 611/2000 [09:59<19:40,  1.18it/s]

Training_loss 20.86433


 31%|███       | 612/2000 [10:00<19:25,  1.19it/s]

Training_loss 20.87902


 31%|███       | 613/2000 [10:01<20:31,  1.13it/s]

Training_loss 21.13615


 31%|███       | 614/2000 [10:02<20:46,  1.11it/s]

Training_loss 21.18837


 31%|███       | 615/2000 [10:02<20:29,  1.13it/s]

Training_loss 21.04267


 31%|███       | 616/2000 [10:03<20:03,  1.15it/s]

Training_loss 21.51303


 31%|███       | 617/2000 [10:04<19:49,  1.16it/s]

Training_loss 21.44927


 31%|███       | 618/2000 [10:05<19:06,  1.21it/s]

Training_loss 21.19890


 31%|███       | 619/2000 [10:06<19:52,  1.16it/s]

Training_loss 21.08452


 31%|███       | 620/2000 [10:07<23:03,  1.00s/it]

Training_loss 21.19902


 31%|███       | 621/2000 [10:08<22:56,  1.00it/s]

Training_loss 21.13322


 31%|███       | 622/2000 [10:09<22:14,  1.03it/s]

Training_loss 21.45689


 31%|███       | 623/2000 [10:10<21:00,  1.09it/s]

Training_loss 21.33227


 31%|███       | 624/2000 [10:11<21:59,  1.04it/s]

Training_loss 21.58455


 31%|███▏      | 625/2000 [10:12<21:10,  1.08it/s]

Training_loss 21.00099


 31%|███▏      | 626/2000 [10:12<20:34,  1.11it/s]

Training_loss 21.20463


 31%|███▏      | 627/2000 [10:13<21:13,  1.08it/s]

Training_loss 21.17105


 31%|███▏      | 628/2000 [10:14<20:40,  1.11it/s]

Training_loss 21.37157


 31%|███▏      | 629/2000 [10:15<20:37,  1.11it/s]

Training_loss 21.41981


 32%|███▏      | 630/2000 [10:16<19:55,  1.15it/s]

Training_loss 21.35324


 32%|███▏      | 631/2000 [10:17<19:37,  1.16it/s]

Training_loss 20.94398


 32%|███▏      | 632/2000 [10:18<19:13,  1.19it/s]

Training_loss 21.14766


 32%|███▏      | 633/2000 [10:19<19:18,  1.18it/s]

Training_loss 21.13471


 32%|███▏      | 634/2000 [10:19<18:44,  1.22it/s]

Training_loss 21.07595


 32%|███▏      | 635/2000 [10:20<18:36,  1.22it/s]

Training_loss 21.19161


 32%|███▏      | 636/2000 [10:21<19:04,  1.19it/s]

Training_loss 21.26020


 32%|███▏      | 637/2000 [10:22<19:46,  1.15it/s]

Training_loss 21.24133


 32%|███▏      | 638/2000 [10:23<19:18,  1.18it/s]

Training_loss 21.19622


 32%|███▏      | 639/2000 [10:24<19:28,  1.16it/s]

Training_loss 21.16195


 32%|███▏      | 640/2000 [10:24<19:33,  1.16it/s]

Training_loss 21.13872


 32%|███▏      | 641/2000 [10:25<19:28,  1.16it/s]

Training_loss 21.25133


 32%|███▏      | 642/2000 [10:26<18:56,  1.20it/s]

Training_loss 21.31163


 32%|███▏      | 643/2000 [10:27<19:48,  1.14it/s]

Training_loss 21.33069


 32%|███▏      | 644/2000 [10:28<19:43,  1.15it/s]

Training_loss 21.29833


 32%|███▏      | 645/2000 [10:29<20:54,  1.08it/s]

Training_loss 21.29864


 32%|███▏      | 646/2000 [10:30<21:39,  1.04it/s]

Training_loss 21.15884


 32%|███▏      | 647/2000 [10:31<20:40,  1.09it/s]

Training_loss 21.05964


 32%|███▏      | 648/2000 [10:32<20:33,  1.10it/s]

Training_loss 21.06366


 32%|███▏      | 649/2000 [10:33<21:29,  1.05it/s]

Training_loss 21.08139


 32%|███▎      | 650/2000 [10:34<22:14,  1.01it/s]

Training_loss 21.10455


 33%|███▎      | 651/2000 [10:35<20:58,  1.07it/s]

Training_loss 21.34342


 33%|███▎      | 652/2000 [10:36<20:38,  1.09it/s]

Training_loss 21.54134


 33%|███▎      | 653/2000 [10:36<19:50,  1.13it/s]

Training_loss 21.49999


 33%|███▎      | 654/2000 [10:37<20:47,  1.08it/s]

Training_loss 21.62295


 33%|███▎      | 655/2000 [10:38<20:43,  1.08it/s]

Training_loss 21.83325


 33%|███▎      | 656/2000 [10:39<21:14,  1.05it/s]

Training_loss 22.05234


 33%|███▎      | 657/2000 [10:40<19:50,  1.13it/s]

Training_loss 21.84634


 33%|███▎      | 658/2000 [10:41<20:55,  1.07it/s]

Training_loss 22.06981


 33%|███▎      | 659/2000 [10:42<21:35,  1.04it/s]

Training_loss 21.91356


 33%|███▎      | 660/2000 [10:43<21:02,  1.06it/s]

Training_loss 21.81722


 33%|███▎      | 661/2000 [10:44<19:45,  1.13it/s]

Training_loss 21.71333


 33%|███▎      | 662/2000 [10:45<19:18,  1.15it/s]

Training_loss 22.27905


 33%|███▎      | 663/2000 [10:46<19:53,  1.12it/s]

Training_loss 22.08521


 33%|███▎      | 664/2000 [10:46<18:46,  1.19it/s]

Training_loss 22.06031


 33%|███▎      | 665/2000 [10:47<19:42,  1.13it/s]

Training_loss 21.78774


 33%|███▎      | 666/2000 [10:48<18:55,  1.18it/s]

Training_loss 21.85885


 33%|███▎      | 667/2000 [10:49<18:13,  1.22it/s]

Training_loss 21.90348


 33%|███▎      | 668/2000 [10:50<17:41,  1.26it/s]

Training_loss 22.07747


 33%|███▎      | 669/2000 [10:50<17:26,  1.27it/s]

Training_loss 22.14125


 34%|███▎      | 670/2000 [10:51<18:10,  1.22it/s]

Training_loss 22.08703


 34%|███▎      | 671/2000 [10:52<18:18,  1.21it/s]

Training_loss 22.55722


 34%|███▎      | 672/2000 [10:53<18:31,  1.19it/s]

Training_loss 22.49927


 34%|███▎      | 673/2000 [10:54<19:42,  1.12it/s]

Training_loss 22.20046


 34%|███▎      | 674/2000 [10:55<21:40,  1.02it/s]

Training_loss 21.77910


 34%|███▍      | 675/2000 [10:56<21:34,  1.02it/s]

Training_loss 21.73276


 34%|███▍      | 676/2000 [10:57<20:58,  1.05it/s]

Training_loss 21.89137


 34%|███▍      | 677/2000 [10:58<20:19,  1.08it/s]

Training_loss 21.95026


 34%|███▍      | 678/2000 [10:59<20:11,  1.09it/s]

Training_loss 21.78019


 34%|███▍      | 679/2000 [11:00<20:02,  1.10it/s]

Training_loss 21.24620


 34%|███▍      | 680/2000 [11:01<20:12,  1.09it/s]

Training_loss 21.39948


 34%|███▍      | 681/2000 [11:01<20:15,  1.09it/s]

Training_loss 22.02702


 34%|███▍      | 682/2000 [11:02<20:24,  1.08it/s]

Training_loss 21.67856


 34%|███▍      | 683/2000 [11:03<20:54,  1.05it/s]

Training_loss 21.72935


 34%|███▍      | 684/2000 [11:04<20:12,  1.09it/s]

Training_loss 21.70866


 34%|███▍      | 685/2000 [11:05<19:46,  1.11it/s]

Training_loss 21.63270


 34%|███▍      | 686/2000 [11:06<19:54,  1.10it/s]

Training_loss 21.68673


 34%|███▍      | 687/2000 [11:07<20:32,  1.07it/s]

Training_loss 21.89795


 34%|███▍      | 688/2000 [11:08<19:40,  1.11it/s]

Training_loss 22.08762


 34%|███▍      | 689/2000 [11:09<18:59,  1.15it/s]

Training_loss 21.93892


 34%|███▍      | 690/2000 [11:10<20:59,  1.04it/s]

Training_loss 21.90314


 35%|███▍      | 691/2000 [11:11<21:45,  1.00it/s]

Training_loss 22.12596


 35%|███▍      | 692/2000 [11:12<20:53,  1.04it/s]

Training_loss 22.10056


 35%|███▍      | 693/2000 [11:13<19:46,  1.10it/s]

Training_loss 22.01104


 35%|███▍      | 694/2000 [11:14<21:45,  1.00it/s]

Training_loss 22.20125


 35%|███▍      | 695/2000 [11:15<20:45,  1.05it/s]

Training_loss 22.31894


 35%|███▍      | 696/2000 [11:16<20:21,  1.07it/s]

Training_loss 21.77047


 35%|███▍      | 697/2000 [11:16<19:59,  1.09it/s]

Training_loss 21.96752


 35%|███▍      | 698/2000 [11:17<20:21,  1.07it/s]

Training_loss 21.70385


 35%|███▍      | 699/2000 [11:18<20:19,  1.07it/s]

Training_loss 21.73017


 35%|███▌      | 700/2000 [11:19<19:07,  1.13it/s]

Training_loss 21.59855


 35%|███▌      | 701/2000 [11:20<18:29,  1.17it/s]

Training_loss 21.49032


 35%|███▌      | 702/2000 [11:21<17:57,  1.20it/s]

Training_loss 21.50158


 35%|███▌      | 703/2000 [11:21<17:31,  1.23it/s]

Training_loss 21.15700


 35%|███▌      | 704/2000 [11:22<19:03,  1.13it/s]

Training_loss 21.12533


 35%|███▌      | 705/2000 [11:23<18:41,  1.15it/s]

Training_loss 21.36391


 35%|███▌      | 706/2000 [11:24<19:13,  1.12it/s]

Training_loss 21.69648


 35%|███▌      | 707/2000 [11:25<18:16,  1.18it/s]

Training_loss 21.69917


 35%|███▌      | 708/2000 [11:26<18:35,  1.16it/s]

Training_loss 21.42523


 35%|███▌      | 709/2000 [11:27<20:16,  1.06it/s]

Training_loss 21.44760


 36%|███▌      | 710/2000 [11:28<21:30,  1.00s/it]

Training_loss 21.53056


 36%|███▌      | 711/2000 [11:29<20:25,  1.05it/s]

Training_loss 21.45273


 36%|███▌      | 712/2000 [11:30<19:19,  1.11it/s]

Training_loss 21.31635


 36%|███▌      | 713/2000 [11:31<18:54,  1.13it/s]

Training_loss 21.61596


 36%|███▌      | 714/2000 [11:32<20:38,  1.04it/s]

Training_loss 21.64790


 36%|███▌      | 715/2000 [11:33<20:00,  1.07it/s]

Training_loss 21.47709


 36%|███▌      | 716/2000 [11:34<21:24,  1.00s/it]

Training_loss 21.22451


 36%|███▌      | 717/2000 [11:35<20:51,  1.02it/s]

Training_loss 21.25520


 36%|███▌      | 718/2000 [11:36<20:36,  1.04it/s]

Training_loss 21.26912


 36%|███▌      | 719/2000 [11:37<20:36,  1.04it/s]

Training_loss 21.14294


 36%|███▌      | 720/2000 [11:38<20:25,  1.04it/s]

Training_loss 21.06013


 36%|███▌      | 721/2000 [11:39<20:34,  1.04it/s]

Training_loss 21.03078


 36%|███▌      | 722/2000 [11:40<20:38,  1.03it/s]

Training_loss 20.79838


 36%|███▌      | 723/2000 [11:41<21:29,  1.01s/it]

Training_loss 20.75094


 36%|███▌      | 724/2000 [11:41<20:16,  1.05it/s]

Training_loss 20.93676


 36%|███▋      | 725/2000 [11:42<19:17,  1.10it/s]

Training_loss 21.34904


 36%|███▋      | 726/2000 [11:43<18:42,  1.13it/s]

Training_loss 21.44954


 36%|███▋      | 727/2000 [11:44<19:00,  1.12it/s]

Training_loss 21.67675


 36%|███▋      | 728/2000 [11:45<21:03,  1.01it/s]

Training_loss 21.85129


 36%|███▋      | 729/2000 [11:46<20:23,  1.04it/s]

Training_loss 21.75655


 36%|███▋      | 730/2000 [11:47<18:53,  1.12it/s]

Training_loss 21.70045


 37%|███▋      | 731/2000 [11:48<19:18,  1.10it/s]

Training_loss 21.88777


 37%|███▋      | 732/2000 [11:49<18:40,  1.13it/s]

Training_loss 22.00182


 37%|███▋      | 733/2000 [11:49<18:28,  1.14it/s]

Training_loss 21.98675


 37%|███▋      | 734/2000 [11:51<19:28,  1.08it/s]

Training_loss 21.92761


 37%|███▋      | 735/2000 [11:51<19:31,  1.08it/s]

Training_loss 22.19966


 37%|███▋      | 736/2000 [11:52<19:06,  1.10it/s]

Training_loss 22.06459


 37%|███▋      | 737/2000 [11:53<18:06,  1.16it/s]

Training_loss 21.96961


 37%|███▋      | 738/2000 [11:54<19:22,  1.09it/s]

Training_loss 21.96702


 37%|███▋      | 739/2000 [11:55<19:10,  1.10it/s]

Training_loss 22.14565


 37%|███▋      | 740/2000 [11:56<19:30,  1.08it/s]

Training_loss 22.06682


 37%|███▋      | 741/2000 [11:57<20:34,  1.02it/s]

Training_loss 21.98029


 37%|███▋      | 742/2000 [11:58<19:48,  1.06it/s]

Training_loss 21.99031


 37%|███▋      | 743/2000 [11:59<19:55,  1.05it/s]

Training_loss 21.91010


 37%|███▋      | 744/2000 [12:00<20:03,  1.04it/s]

Training_loss 21.91202


 37%|███▋      | 745/2000 [12:01<19:15,  1.09it/s]

Training_loss 22.00318


 37%|███▋      | 746/2000 [12:01<18:03,  1.16it/s]

Training_loss 22.22998


 37%|███▋      | 747/2000 [12:02<18:06,  1.15it/s]

Training_loss 22.08469


 37%|███▋      | 748/2000 [12:03<18:06,  1.15it/s]

Training_loss 22.21213


 37%|███▋      | 749/2000 [12:04<17:56,  1.16it/s]

Training_loss 22.17157


 38%|███▊      | 750/2000 [12:05<17:32,  1.19it/s]

Training_loss 22.22764


 38%|███▊      | 751/2000 [12:06<18:53,  1.10it/s]

Training_loss 21.97668


 38%|███▊      | 752/2000 [12:07<18:36,  1.12it/s]

Training_loss 22.23444


 38%|███▊      | 753/2000 [12:08<21:06,  1.02s/it]

Training_loss 22.14940


 38%|███▊      | 754/2000 [12:09<20:47,  1.00s/it]

Training_loss 22.38001


 38%|███▊      | 755/2000 [12:10<19:22,  1.07it/s]

Training_loss 22.52227


 38%|███▊      | 756/2000 [12:11<19:29,  1.06it/s]

Training_loss 22.18891


 38%|███▊      | 757/2000 [12:12<18:25,  1.12it/s]

Training_loss 21.94772


 38%|███▊      | 758/2000 [12:13<20:52,  1.01s/it]

Training_loss 21.84631


 38%|███▊      | 759/2000 [12:14<19:37,  1.05it/s]

Training_loss 21.77115


 38%|███▊      | 760/2000 [12:14<18:27,  1.12it/s]

Training_loss 21.89701


 38%|███▊      | 761/2000 [12:15<19:38,  1.05it/s]

Training_loss 21.92631


 38%|███▊      | 762/2000 [12:17<20:46,  1.01s/it]

Training_loss 21.99739


 38%|███▊      | 763/2000 [12:18<20:02,  1.03it/s]

Training_loss 21.99386


 38%|███▊      | 764/2000 [12:18<19:38,  1.05it/s]

Training_loss 21.58735


 38%|███▊      | 765/2000 [12:19<18:41,  1.10it/s]

Training_loss 21.75234


 38%|███▊      | 766/2000 [12:20<18:57,  1.08it/s]

Training_loss 21.78306


 38%|███▊      | 767/2000 [12:21<18:10,  1.13it/s]

Training_loss 21.70281


 38%|███▊      | 768/2000 [12:22<19:11,  1.07it/s]

Training_loss 21.72864


 38%|███▊      | 769/2000 [12:23<19:22,  1.06it/s]

Training_loss 21.72540


 38%|███▊      | 770/2000 [12:24<20:27,  1.00it/s]

Training_loss 21.94975


 39%|███▊      | 771/2000 [12:25<20:13,  1.01it/s]

Training_loss 21.97961


 39%|███▊      | 772/2000 [12:26<19:04,  1.07it/s]

Training_loss 22.18384


 39%|███▊      | 773/2000 [12:27<18:56,  1.08it/s]

Training_loss 22.53038


 39%|███▊      | 774/2000 [12:28<18:20,  1.11it/s]

Training_loss 22.21071


 39%|███▉      | 775/2000 [12:29<18:18,  1.11it/s]

Training_loss 22.46958


 39%|███▉      | 776/2000 [12:29<17:55,  1.14it/s]

Training_loss 22.83779


 39%|███▉      | 777/2000 [12:30<17:34,  1.16it/s]

Training_loss 22.56345


 39%|███▉      | 778/2000 [12:31<20:02,  1.02it/s]

Training_loss 22.50020


 39%|███▉      | 779/2000 [12:32<20:17,  1.00it/s]

Training_loss 22.31161


 39%|███▉      | 780/2000 [12:33<19:54,  1.02it/s]

Training_loss 22.33353


 39%|███▉      | 781/2000 [12:34<18:43,  1.09it/s]

Training_loss 22.34394


 39%|███▉      | 782/2000 [12:35<19:31,  1.04it/s]

Training_loss 22.23354


 39%|███▉      | 783/2000 [12:36<20:06,  1.01it/s]

Training_loss 22.18372


 39%|███▉      | 784/2000 [12:37<18:50,  1.08it/s]

Training_loss 22.26437


 39%|███▉      | 785/2000 [12:38<19:00,  1.07it/s]

Training_loss 22.17594


 39%|███▉      | 786/2000 [12:39<19:06,  1.06it/s]

Training_loss 22.14961


 39%|███▉      | 787/2000 [12:40<19:12,  1.05it/s]

Training_loss 22.18486


 39%|███▉      | 788/2000 [12:41<18:25,  1.10it/s]

Training_loss 22.16858


 39%|███▉      | 789/2000 [12:42<17:58,  1.12it/s]

Training_loss 22.42613


 40%|███▉      | 790/2000 [12:43<18:03,  1.12it/s]

Training_loss 22.31184


 40%|███▉      | 791/2000 [12:44<18:22,  1.10it/s]

Training_loss 22.38590


 40%|███▉      | 792/2000 [12:45<19:39,  1.02it/s]

Training_loss 22.38499


 40%|███▉      | 793/2000 [12:45<18:43,  1.07it/s]

Training_loss 22.47549


 40%|███▉      | 794/2000 [12:46<17:35,  1.14it/s]

Training_loss 22.52659


 40%|███▉      | 795/2000 [12:47<17:33,  1.14it/s]

Training_loss 22.39091


 40%|███▉      | 796/2000 [12:48<19:25,  1.03it/s]

Training_loss 22.44252


 40%|███▉      | 797/2000 [12:49<18:35,  1.08it/s]

Training_loss 22.60392


 40%|███▉      | 798/2000 [12:51<26:16,  1.31s/it]

Training_loss 22.48683


 40%|███▉      | 799/2000 [12:53<26:06,  1.30s/it]

Training_loss 22.33617


 40%|████      | 800/2000 [12:54<25:50,  1.29s/it]

Training_loss 22.48572


 40%|████      | 801/2000 [12:55<25:12,  1.26s/it]

Training_loss 22.55735


 40%|████      | 802/2000 [12:57<29:58,  1.50s/it]

Training_loss 22.72827


 40%|████      | 803/2000 [12:59<30:18,  1.52s/it]

Training_loss 22.86074


 40%|████      | 804/2000 [13:00<28:17,  1.42s/it]

Training_loss 23.07478


 40%|████      | 805/2000 [13:01<25:55,  1.30s/it]

Training_loss 23.12165


 40%|████      | 806/2000 [13:02<24:17,  1.22s/it]

Training_loss 23.09818


 40%|████      | 807/2000 [13:03<23:06,  1.16s/it]

Training_loss 22.83791


 40%|████      | 808/2000 [13:05<27:39,  1.39s/it]

Training_loss 22.75477


 40%|████      | 809/2000 [13:07<31:29,  1.59s/it]

Training_loss 22.80041


 40%|████      | 810/2000 [13:09<32:13,  1.63s/it]

Training_loss 22.74266


 41%|████      | 811/2000 [13:10<32:04,  1.62s/it]

Training_loss 22.78835


 41%|████      | 812/2000 [13:11<28:40,  1.45s/it]

Training_loss 22.52870


 41%|████      | 813/2000 [13:12<25:25,  1.29s/it]

Training_loss 22.61958


 41%|████      | 814/2000 [13:13<24:25,  1.24s/it]

Training_loss 22.63817


 41%|████      | 815/2000 [13:14<22:47,  1.15s/it]

Training_loss 22.69230


 41%|████      | 816/2000 [13:15<22:41,  1.15s/it]

Training_loss 22.68868


 41%|████      | 817/2000 [13:16<21:00,  1.07s/it]

Training_loss 22.56636


 41%|████      | 818/2000 [13:17<20:37,  1.05s/it]

Training_loss 22.48821


 41%|████      | 819/2000 [13:18<20:15,  1.03s/it]

Training_loss 22.37360


 41%|████      | 820/2000 [13:19<19:58,  1.02s/it]

Training_loss 22.02584


 41%|████      | 821/2000 [13:20<19:32,  1.01it/s]

Training_loss 22.14994


 41%|████      | 822/2000 [13:21<18:52,  1.04it/s]

Training_loss 22.11383


 41%|████      | 823/2000 [13:22<18:33,  1.06it/s]

Training_loss 22.04718


 41%|████      | 824/2000 [13:23<18:34,  1.05it/s]

Training_loss 22.13266


 41%|████▏     | 825/2000 [13:24<19:08,  1.02it/s]

Training_loss 22.32581


 41%|████▏     | 826/2000 [13:25<18:20,  1.07it/s]

Training_loss 22.00263


 41%|████▏     | 827/2000 [13:26<18:45,  1.04it/s]

Training_loss 21.89316


 41%|████▏     | 828/2000 [13:27<18:52,  1.03it/s]

Training_loss 21.93401


 41%|████▏     | 829/2000 [13:28<18:56,  1.03it/s]

Training_loss 21.93488


 42%|████▏     | 830/2000 [13:29<18:51,  1.03it/s]

Training_loss 21.78776


 42%|████▏     | 831/2000 [13:30<17:38,  1.10it/s]

Training_loss 22.15892


 42%|████▏     | 832/2000 [13:30<17:17,  1.13it/s]

Training_loss 22.08898


 42%|████▏     | 833/2000 [13:31<17:51,  1.09it/s]

Training_loss 21.80109


 42%|████▏     | 834/2000 [13:32<17:20,  1.12it/s]

Training_loss 21.63482


 42%|████▏     | 835/2000 [13:33<18:11,  1.07it/s]

Training_loss 21.60053


 42%|████▏     | 836/2000 [13:34<17:44,  1.09it/s]

Training_loss 21.71296


 42%|████▏     | 837/2000 [13:35<17:33,  1.10it/s]

Training_loss 21.93226


 42%|████▏     | 838/2000 [13:36<17:22,  1.11it/s]

Training_loss 22.10452


 42%|████▏     | 839/2000 [13:37<18:57,  1.02it/s]

Training_loss 22.32028


 42%|████▏     | 840/2000 [13:38<18:17,  1.06it/s]

Training_loss 22.34507


 42%|████▏     | 841/2000 [13:39<18:31,  1.04it/s]

Training_loss 22.32629


 42%|████▏     | 842/2000 [13:40<17:44,  1.09it/s]

Training_loss 22.52355


 42%|████▏     | 843/2000 [13:41<18:02,  1.07it/s]

Training_loss 22.35260


 42%|████▏     | 844/2000 [13:42<17:55,  1.08it/s]

Training_loss 22.18540


 42%|████▏     | 845/2000 [13:43<18:12,  1.06it/s]

Training_loss 22.32404


 42%|████▏     | 846/2000 [13:44<20:42,  1.08s/it]

Training_loss 22.11669


 42%|████▏     | 847/2000 [13:45<19:13,  1.00s/it]

Training_loss 22.21972


 42%|████▏     | 848/2000 [13:46<19:45,  1.03s/it]

Training_loss 22.23541


 42%|████▏     | 849/2000 [13:47<18:49,  1.02it/s]

Training_loss 22.18194


 42%|████▎     | 850/2000 [13:48<18:24,  1.04it/s]

Training_loss 22.55162


 43%|████▎     | 851/2000 [13:49<18:05,  1.06it/s]

Training_loss 22.80767


 43%|████▎     | 852/2000 [13:50<17:57,  1.07it/s]

Training_loss 22.64527


 43%|████▎     | 853/2000 [13:50<17:26,  1.10it/s]

Training_loss 22.46361


 43%|████▎     | 854/2000 [13:51<18:27,  1.03it/s]

Training_loss 22.17205


 43%|████▎     | 855/2000 [13:52<18:31,  1.03it/s]

Training_loss 22.17827


 43%|████▎     | 856/2000 [13:54<19:16,  1.01s/it]

Training_loss 22.40512


 43%|████▎     | 857/2000 [13:54<18:50,  1.01it/s]

Training_loss 22.41117


 43%|████▎     | 858/2000 [13:55<17:44,  1.07it/s]

Training_loss 22.58250


 43%|████▎     | 859/2000 [13:56<16:59,  1.12it/s]

Training_loss 22.59707


 43%|████▎     | 860/2000 [13:57<16:28,  1.15it/s]

Training_loss 22.65112


 43%|████▎     | 861/2000 [13:58<18:17,  1.04it/s]

Training_loss 22.69890


 43%|████▎     | 862/2000 [13:59<18:05,  1.05it/s]

Training_loss 22.77283


 43%|████▎     | 863/2000 [14:00<18:09,  1.04it/s]

Training_loss 22.71596


 43%|████▎     | 864/2000 [14:01<17:13,  1.10it/s]

Training_loss 22.79325


 43%|████▎     | 865/2000 [14:02<17:35,  1.08it/s]

Training_loss 23.06541


 43%|████▎     | 866/2000 [14:03<17:16,  1.09it/s]

Training_loss 22.72512


 43%|████▎     | 867/2000 [14:03<17:03,  1.11it/s]

Training_loss 22.63941


 43%|████▎     | 868/2000 [14:04<17:22,  1.09it/s]

Training_loss 22.91172


 43%|████▎     | 869/2000 [14:06<18:49,  1.00it/s]

Training_loss 23.02168


 44%|████▎     | 870/2000 [14:07<18:24,  1.02it/s]

Training_loss 22.63187


 44%|████▎     | 871/2000 [14:08<18:16,  1.03it/s]

Training_loss 22.57309


 44%|████▎     | 872/2000 [14:09<20:34,  1.09s/it]

Training_loss 22.57632


 44%|████▎     | 873/2000 [14:10<19:22,  1.03s/it]

Training_loss 22.69907


 44%|████▎     | 874/2000 [14:11<18:55,  1.01s/it]

Training_loss 22.92702


 44%|████▍     | 875/2000 [14:12<18:57,  1.01s/it]

Training_loss 22.81708


 44%|████▍     | 876/2000 [14:13<17:49,  1.05it/s]

Training_loss 22.95665


 44%|████▍     | 877/2000 [14:14<18:08,  1.03it/s]

Training_loss 22.77089


 44%|████▍     | 878/2000 [14:15<18:50,  1.01s/it]

Training_loss 22.67361


 44%|████▍     | 879/2000 [14:15<17:35,  1.06it/s]

Training_loss 22.42664


 44%|████▍     | 880/2000 [14:16<17:42,  1.05it/s]

Training_loss 22.27164


 44%|████▍     | 881/2000 [14:17<18:04,  1.03it/s]

Training_loss 22.45626


 44%|████▍     | 882/2000 [14:18<17:21,  1.07it/s]

Training_loss 22.43520


 44%|████▍     | 883/2000 [14:19<17:39,  1.05it/s]

Training_loss 22.30976


 44%|████▍     | 884/2000 [14:21<19:27,  1.05s/it]

Training_loss 22.84396


 44%|████▍     | 885/2000 [14:21<18:26,  1.01it/s]

Training_loss 22.82992


 44%|████▍     | 886/2000 [14:23<19:34,  1.05s/it]

Training_loss 22.82415


 44%|████▍     | 887/2000 [14:24<18:43,  1.01s/it]

Training_loss 22.74093


 44%|████▍     | 888/2000 [14:24<18:18,  1.01it/s]

Training_loss 22.57421


 44%|████▍     | 889/2000 [14:25<18:00,  1.03it/s]

Training_loss 22.76820


 44%|████▍     | 890/2000 [14:26<18:30,  1.00s/it]

Training_loss 22.86946


 45%|████▍     | 891/2000 [14:27<17:29,  1.06it/s]

Training_loss 22.92231


 45%|████▍     | 892/2000 [14:28<17:17,  1.07it/s]

Training_loss 22.96578


 45%|████▍     | 893/2000 [14:29<16:16,  1.13it/s]

Training_loss 23.00811


 45%|████▍     | 894/2000 [14:30<16:21,  1.13it/s]

Training_loss 22.92484


 45%|████▍     | 895/2000 [14:31<16:03,  1.15it/s]

Training_loss 22.78968


 45%|████▍     | 896/2000 [14:32<16:28,  1.12it/s]

Training_loss 22.68148


 45%|████▍     | 897/2000 [14:32<15:42,  1.17it/s]

Training_loss 22.62942


 45%|████▍     | 898/2000 [14:33<15:46,  1.16it/s]

Training_loss 22.68586


 45%|████▍     | 899/2000 [14:34<16:08,  1.14it/s]

Training_loss 22.71574


 45%|████▌     | 900/2000 [14:35<15:14,  1.20it/s]

Training_loss 22.59444


 45%|████▌     | 901/2000 [14:36<15:43,  1.17it/s]

Training_loss 22.55261


 45%|████▌     | 902/2000 [14:37<15:11,  1.20it/s]

Training_loss 22.71982


 45%|████▌     | 903/2000 [14:38<17:51,  1.02it/s]

Training_loss 23.08861


 45%|████▌     | 904/2000 [14:39<17:48,  1.03it/s]

Training_loss 23.11330


 45%|████▌     | 905/2000 [14:40<16:54,  1.08it/s]

Training_loss 22.84850


 45%|████▌     | 906/2000 [14:41<17:12,  1.06it/s]

Training_loss 22.82345


 45%|████▌     | 907/2000 [14:42<17:35,  1.04it/s]

Training_loss 22.87155


 45%|████▌     | 908/2000 [14:43<17:44,  1.03it/s]

Training_loss 23.06251


 45%|████▌     | 909/2000 [14:43<16:37,  1.09it/s]

Training_loss 22.68357


 46%|████▌     | 910/2000 [14:44<17:09,  1.06it/s]

Training_loss 22.58348


 46%|████▌     | 911/2000 [14:45<17:07,  1.06it/s]

Training_loss 22.58966


 46%|████▌     | 912/2000 [14:46<16:23,  1.11it/s]

Training_loss 22.51320


 46%|████▌     | 913/2000 [14:47<15:56,  1.14it/s]

Training_loss 22.66007


 46%|████▌     | 914/2000 [14:48<15:44,  1.15it/s]

Training_loss 22.83651


 46%|████▌     | 915/2000 [14:49<15:52,  1.14it/s]

Training_loss 23.09925


 46%|████▌     | 916/2000 [14:50<17:21,  1.04it/s]

Training_loss 23.06369


 46%|████▌     | 917/2000 [14:51<17:39,  1.02it/s]

Training_loss 23.17527


 46%|████▌     | 918/2000 [14:52<17:22,  1.04it/s]

Training_loss 22.90763


 46%|████▌     | 919/2000 [14:53<17:19,  1.04it/s]

Training_loss 22.84360


 46%|████▌     | 920/2000 [14:54<17:16,  1.04it/s]

Training_loss 22.72886


 46%|████▌     | 921/2000 [14:55<17:22,  1.03it/s]

Training_loss 22.73231


 46%|████▌     | 922/2000 [14:56<18:20,  1.02s/it]

Training_loss 22.58219


 46%|████▌     | 923/2000 [14:57<18:07,  1.01s/it]

Training_loss 22.58892


 46%|████▌     | 924/2000 [14:58<19:07,  1.07s/it]

Training_loss 22.68907


 46%|████▋     | 925/2000 [14:59<17:53,  1.00it/s]

Training_loss 22.58541


 46%|████▋     | 926/2000 [15:00<17:05,  1.05it/s]

Training_loss 22.69187


 46%|████▋     | 927/2000 [15:01<16:28,  1.09it/s]

Training_loss 22.64336


 46%|████▋     | 928/2000 [15:02<16:12,  1.10it/s]

Training_loss 22.61093


 46%|████▋     | 929/2000 [15:03<16:32,  1.08it/s]

Training_loss 22.83458


 46%|████▋     | 930/2000 [15:04<17:00,  1.05it/s]

Training_loss 22.62417


 47%|████▋     | 931/2000 [15:04<15:54,  1.12it/s]

Training_loss 22.96221


 47%|████▋     | 932/2000 [15:06<18:00,  1.01s/it]

Training_loss 22.90017


 47%|████▋     | 933/2000 [15:06<17:02,  1.04it/s]

Training_loss 22.86374


 47%|████▋     | 934/2000 [15:08<18:07,  1.02s/it]

Training_loss 22.67815


 47%|████▋     | 935/2000 [15:09<18:43,  1.06s/it]

Training_loss 22.59444


 47%|████▋     | 936/2000 [15:10<18:36,  1.05s/it]

Training_loss 22.71543


 47%|████▋     | 937/2000 [15:11<18:25,  1.04s/it]

Training_loss 22.80956


 47%|████▋     | 938/2000 [15:12<18:34,  1.05s/it]

Training_loss 22.85539


 47%|████▋     | 939/2000 [15:13<18:18,  1.04s/it]

Training_loss 22.86356


 47%|████▋     | 940/2000 [15:14<17:50,  1.01s/it]

Training_loss 23.07069


 47%|████▋     | 941/2000 [15:15<18:37,  1.06s/it]

Training_loss 22.95734


 47%|████▋     | 942/2000 [15:16<17:47,  1.01s/it]

Training_loss 23.05212


 47%|████▋     | 943/2000 [15:17<16:40,  1.06it/s]

Training_loss 23.06162


 47%|████▋     | 944/2000 [15:18<17:51,  1.01s/it]

Training_loss 23.09356


 47%|████▋     | 945/2000 [15:19<16:43,  1.05it/s]

Training_loss 23.08022


 47%|████▋     | 946/2000 [15:20<17:23,  1.01it/s]

Training_loss 22.89892


 47%|████▋     | 947/2000 [15:21<17:49,  1.02s/it]

Training_loss 22.74528


 47%|████▋     | 948/2000 [15:22<16:40,  1.05it/s]

Training_loss 22.90558


 47%|████▋     | 949/2000 [15:23<16:29,  1.06it/s]

Training_loss 22.79426


 48%|████▊     | 950/2000 [15:23<16:19,  1.07it/s]

Training_loss 22.92198


 48%|████▊     | 951/2000 [15:24<16:18,  1.07it/s]

Training_loss 22.95636


 48%|████▊     | 952/2000 [15:25<15:20,  1.14it/s]

Training_loss 22.78821


 48%|████▊     | 953/2000 [15:26<16:28,  1.06it/s]

Training_loss 22.92371


 48%|████▊     | 954/2000 [15:27<17:15,  1.01it/s]

Training_loss 23.11819


 48%|████▊     | 955/2000 [15:28<17:01,  1.02it/s]

Training_loss 22.98404


 48%|████▊     | 956/2000 [15:29<17:31,  1.01s/it]

Training_loss 22.90670


 48%|████▊     | 957/2000 [15:30<17:58,  1.03s/it]

Training_loss 22.96445


 48%|████▊     | 958/2000 [15:31<18:01,  1.04s/it]

Training_loss 22.93443


 48%|████▊     | 959/2000 [15:33<18:14,  1.05s/it]

Training_loss 22.90232


 48%|████▊     | 960/2000 [15:33<16:56,  1.02it/s]

Training_loss 22.96622


 48%|████▊     | 961/2000 [15:34<16:36,  1.04it/s]

Training_loss 22.96082


 48%|████▊     | 962/2000 [15:35<16:33,  1.05it/s]

Training_loss 22.84021


 48%|████▊     | 963/2000 [15:36<15:33,  1.11it/s]

Training_loss 22.78304


 48%|████▊     | 964/2000 [15:37<15:40,  1.10it/s]

Training_loss 22.89624


 48%|████▊     | 965/2000 [15:38<16:29,  1.05it/s]

Training_loss 22.89391


 48%|████▊     | 966/2000 [15:39<15:44,  1.09it/s]

Training_loss 22.97099


 48%|████▊     | 967/2000 [15:40<16:10,  1.06it/s]

Training_loss 23.02388


 48%|████▊     | 968/2000 [15:41<15:33,  1.11it/s]

Training_loss 23.02184


 48%|████▊     | 969/2000 [15:42<17:11,  1.00s/it]

Training_loss 23.00604


 48%|████▊     | 970/2000 [15:43<16:02,  1.07it/s]

Training_loss 23.05069


 49%|████▊     | 971/2000 [15:44<16:06,  1.07it/s]

Training_loss 22.92027


 49%|████▊     | 972/2000 [15:45<16:06,  1.06it/s]

Training_loss 22.92680


 49%|████▊     | 973/2000 [15:45<16:08,  1.06it/s]

Training_loss 22.68420


 49%|████▊     | 974/2000 [15:47<18:08,  1.06s/it]

Training_loss 22.46514


 49%|████▉     | 975/2000 [15:48<16:52,  1.01it/s]

Training_loss 22.43603


 49%|████▉     | 976/2000 [15:49<16:24,  1.04it/s]

Training_loss 22.61189


 49%|████▉     | 977/2000 [15:49<15:55,  1.07it/s]

Training_loss 22.87602


 49%|████▉     | 978/2000 [15:50<16:06,  1.06it/s]

Training_loss 22.77470


 49%|████▉     | 979/2000 [15:51<16:23,  1.04it/s]

Training_loss 22.76949


 49%|████▉     | 980/2000 [15:52<15:49,  1.07it/s]

Training_loss 22.90320


 49%|████▉     | 981/2000 [15:53<17:17,  1.02s/it]

Training_loss 22.91665


 49%|████▉     | 982/2000 [15:54<16:59,  1.00s/it]

Training_loss 22.85292


 49%|████▉     | 983/2000 [15:55<16:13,  1.04it/s]

Training_loss 23.03167


 49%|████▉     | 984/2000 [15:56<17:17,  1.02s/it]

Training_loss 23.02436


 49%|████▉     | 985/2000 [15:57<17:04,  1.01s/it]

Training_loss 23.17551


 49%|████▉     | 986/2000 [15:58<17:03,  1.01s/it]

Training_loss 23.47455


 49%|████▉     | 987/2000 [15:59<16:23,  1.03it/s]

Training_loss 23.37380


 49%|████▉     | 988/2000 [16:00<16:35,  1.02it/s]

Training_loss 23.45274


 49%|████▉     | 989/2000 [16:01<15:20,  1.10it/s]

Training_loss 23.32828


 50%|████▉     | 990/2000 [16:02<17:15,  1.03s/it]

Training_loss 23.20880


 50%|████▉     | 991/2000 [16:03<16:39,  1.01it/s]

Training_loss 23.40494


 50%|████▉     | 992/2000 [16:04<16:03,  1.05it/s]

Training_loss 23.38893


 50%|████▉     | 993/2000 [16:05<15:12,  1.10it/s]

Training_loss 23.51195


 50%|████▉     | 994/2000 [16:06<14:59,  1.12it/s]

Training_loss 23.24952


 50%|████▉     | 995/2000 [16:07<14:50,  1.13it/s]

Training_loss 23.37133


 50%|████▉     | 996/2000 [16:08<15:34,  1.07it/s]

Training_loss 23.20820


 50%|████▉     | 997/2000 [16:08<14:35,  1.15it/s]

Training_loss 23.36627


 50%|████▉     | 998/2000 [16:09<14:58,  1.11it/s]

Training_loss 23.32015


 50%|████▉     | 999/2000 [16:10<14:38,  1.14it/s]

Training_loss 23.15995


 50%|█████     | 1000/2000 [16:11<14:46,  1.13it/s]

Training_loss 23.09881


 50%|█████     | 1001/2000 [16:12<14:47,  1.13it/s]

Training_loss 23.24759


 50%|█████     | 1002/2000 [16:13<15:28,  1.08it/s]

Training_loss 23.14209


 50%|█████     | 1003/2000 [16:14<15:52,  1.05it/s]

Training_loss 23.20948


 50%|█████     | 1004/2000 [16:15<15:28,  1.07it/s]

Training_loss 23.14464


 50%|█████     | 1005/2000 [16:16<15:31,  1.07it/s]

Training_loss 23.13013


 50%|█████     | 1006/2000 [16:17<14:56,  1.11it/s]

Training_loss 23.15291


 50%|█████     | 1007/2000 [16:18<14:36,  1.13it/s]

Training_loss 22.73769


 50%|█████     | 1008/2000 [16:18<14:35,  1.13it/s]

Training_loss 22.63691


 50%|█████     | 1009/2000 [16:19<15:15,  1.08it/s]

Training_loss 22.15036


 50%|█████     | 1010/2000 [16:20<15:01,  1.10it/s]

Training_loss 22.13805


 51%|█████     | 1011/2000 [16:21<14:20,  1.15it/s]

Training_loss 22.32139


 51%|█████     | 1012/2000 [16:22<14:48,  1.11it/s]

Training_loss 22.11869


 51%|█████     | 1013/2000 [16:23<14:05,  1.17it/s]

Training_loss 22.08918


 51%|█████     | 1014/2000 [16:24<14:55,  1.10it/s]

Training_loss 21.99233


 51%|█████     | 1015/2000 [16:25<14:57,  1.10it/s]

Training_loss 22.18479


 51%|█████     | 1016/2000 [16:26<14:04,  1.17it/s]

Training_loss 22.38402


 51%|█████     | 1017/2000 [16:27<15:16,  1.07it/s]

Training_loss 22.62195


 51%|█████     | 1018/2000 [16:27<15:01,  1.09it/s]

Training_loss 22.75517


 51%|█████     | 1019/2000 [16:29<15:39,  1.04it/s]

Training_loss 22.80768


 51%|█████     | 1020/2000 [16:29<14:47,  1.10it/s]

Training_loss 22.97782


 51%|█████     | 1021/2000 [16:30<14:55,  1.09it/s]

Training_loss 22.96303


 51%|█████     | 1022/2000 [16:31<14:22,  1.13it/s]

Training_loss 22.67015


 51%|█████     | 1023/2000 [16:32<14:05,  1.16it/s]

Training_loss 22.79307


 51%|█████     | 1024/2000 [16:33<14:01,  1.16it/s]

Training_loss 23.15449


 51%|█████▏    | 1025/2000 [16:33<13:21,  1.22it/s]

Training_loss 23.02733


 51%|█████▏    | 1026/2000 [16:35<15:01,  1.08it/s]

Training_loss 22.78537


 51%|█████▏    | 1027/2000 [16:36<15:11,  1.07it/s]

Training_loss 22.71986


 51%|█████▏    | 1028/2000 [16:37<14:58,  1.08it/s]

Training_loss 22.43183


 51%|█████▏    | 1029/2000 [16:37<14:52,  1.09it/s]

Training_loss 22.19287


 52%|█████▏    | 1030/2000 [16:38<15:22,  1.05it/s]

Training_loss 22.50375


 52%|█████▏    | 1031/2000 [16:39<14:39,  1.10it/s]

Training_loss 22.50679


 52%|█████▏    | 1032/2000 [16:40<13:55,  1.16it/s]

Training_loss 22.51837


 52%|█████▏    | 1033/2000 [16:41<13:17,  1.21it/s]

Training_loss 22.66952


 52%|█████▏    | 1034/2000 [16:42<13:21,  1.21it/s]

Training_loss 22.76545


 52%|█████▏    | 1035/2000 [16:43<14:02,  1.15it/s]

Training_loss 22.96749


 52%|█████▏    | 1036/2000 [16:43<14:12,  1.13it/s]

Training_loss 23.15265


 52%|█████▏    | 1037/2000 [16:44<14:10,  1.13it/s]

Training_loss 23.52945


 52%|█████▏    | 1038/2000 [16:45<13:42,  1.17it/s]

Training_loss 23.31930


 52%|█████▏    | 1039/2000 [16:46<13:07,  1.22it/s]

Training_loss 23.58595


 52%|█████▏    | 1040/2000 [16:47<13:06,  1.22it/s]

Training_loss 23.42861


 52%|█████▏    | 1041/2000 [16:48<13:52,  1.15it/s]

Training_loss 23.44140


 52%|█████▏    | 1042/2000 [16:49<14:56,  1.07it/s]

Training_loss 23.32082


 52%|█████▏    | 1043/2000 [16:50<14:37,  1.09it/s]

Training_loss 23.33378


 52%|█████▏    | 1044/2000 [16:51<15:04,  1.06it/s]

Training_loss 23.11917


 52%|█████▏    | 1045/2000 [16:52<15:04,  1.06it/s]

Training_loss 22.82315


 52%|█████▏    | 1046/2000 [16:53<15:42,  1.01it/s]

Training_loss 22.68199


 52%|█████▏    | 1047/2000 [16:54<15:37,  1.02it/s]

Training_loss 22.66751


 52%|█████▏    | 1048/2000 [16:55<15:14,  1.04it/s]

Training_loss 22.41505


 52%|█████▏    | 1049/2000 [16:55<14:33,  1.09it/s]

Training_loss 22.81878


 52%|█████▎    | 1050/2000 [16:56<14:46,  1.07it/s]

Training_loss 22.81932


 53%|█████▎    | 1051/2000 [16:57<14:04,  1.12it/s]

Training_loss 22.78263


 53%|█████▎    | 1052/2000 [16:58<13:50,  1.14it/s]

Training_loss 22.66693


 53%|█████▎    | 1053/2000 [16:59<13:44,  1.15it/s]

Training_loss 22.84191


 53%|█████▎    | 1054/2000 [17:00<15:39,  1.01it/s]

Training_loss 22.65471


 53%|█████▎    | 1055/2000 [17:01<15:02,  1.05it/s]

Training_loss 22.96741


 53%|█████▎    | 1056/2000 [17:02<14:10,  1.11it/s]

Training_loss 23.12697


 53%|█████▎    | 1057/2000 [17:03<14:37,  1.07it/s]

Training_loss 22.85376


 53%|█████▎    | 1058/2000 [17:04<14:52,  1.06it/s]

Training_loss 23.10529


 53%|█████▎    | 1059/2000 [17:05<14:02,  1.12it/s]

Training_loss 23.10740


 53%|█████▎    | 1060/2000 [17:05<14:11,  1.10it/s]

Training_loss 23.41959


 53%|█████▎    | 1061/2000 [17:06<14:31,  1.08it/s]

Training_loss 23.26764


 53%|█████▎    | 1062/2000 [17:07<14:21,  1.09it/s]

Training_loss 23.37474


 53%|█████▎    | 1063/2000 [17:08<14:40,  1.06it/s]

Training_loss 23.29290


 53%|█████▎    | 1064/2000 [17:09<14:05,  1.11it/s]

Training_loss 23.27353


 53%|█████▎    | 1065/2000 [17:10<13:42,  1.14it/s]

Training_loss 23.39490


 53%|█████▎    | 1066/2000 [17:11<14:34,  1.07it/s]

Training_loss 23.50268


 53%|█████▎    | 1067/2000 [17:12<14:43,  1.06it/s]

Training_loss 23.33815


 53%|█████▎    | 1068/2000 [17:13<14:34,  1.07it/s]

Training_loss 23.42056


 53%|█████▎    | 1069/2000 [17:14<13:59,  1.11it/s]

Training_loss 23.51075


 54%|█████▎    | 1070/2000 [17:15<14:59,  1.03it/s]

Training_loss 23.47435


 54%|█████▎    | 1071/2000 [17:16<14:40,  1.05it/s]

Training_loss 23.49799


 54%|█████▎    | 1072/2000 [17:17<14:14,  1.09it/s]

Training_loss 23.39585


 54%|█████▎    | 1073/2000 [17:17<13:47,  1.12it/s]

Training_loss 23.38351


 54%|█████▎    | 1074/2000 [17:19<14:33,  1.06it/s]

Training_loss 23.49032


 54%|█████▍    | 1075/2000 [17:19<13:34,  1.14it/s]

Training_loss 23.26623


 54%|█████▍    | 1076/2000 [17:20<13:11,  1.17it/s]

Training_loss 23.17414


 54%|█████▍    | 1077/2000 [17:21<12:34,  1.22it/s]

Training_loss 23.06083


 54%|█████▍    | 1078/2000 [17:22<12:55,  1.19it/s]

Training_loss 23.17205


 54%|█████▍    | 1079/2000 [17:23<14:28,  1.06it/s]

Training_loss 23.26354


 54%|█████▍    | 1080/2000 [17:24<14:05,  1.09it/s]

Training_loss 23.47148


 54%|█████▍    | 1081/2000 [17:25<13:57,  1.10it/s]

Training_loss 23.44203


 54%|█████▍    | 1082/2000 [17:25<13:41,  1.12it/s]

Training_loss 23.50582


 54%|█████▍    | 1083/2000 [17:26<12:55,  1.18it/s]

Training_loss 23.19537


 54%|█████▍    | 1084/2000 [17:27<13:36,  1.12it/s]

Training_loss 23.06308


 54%|█████▍    | 1085/2000 [17:28<13:24,  1.14it/s]

Training_loss 23.20781


 54%|█████▍    | 1086/2000 [17:29<12:50,  1.19it/s]

Training_loss 23.04037


 54%|█████▍    | 1087/2000 [17:30<12:41,  1.20it/s]

Training_loss 23.05224


 54%|█████▍    | 1088/2000 [17:31<13:39,  1.11it/s]

Training_loss 22.99362


 54%|█████▍    | 1089/2000 [17:32<15:20,  1.01s/it]

Training_loss 23.01038


 55%|█████▍    | 1090/2000 [17:33<15:13,  1.00s/it]

Training_loss 23.04694


 55%|█████▍    | 1091/2000 [17:34<14:32,  1.04it/s]

Training_loss 23.23291


 55%|█████▍    | 1092/2000 [17:35<13:49,  1.10it/s]

Training_loss 23.47790


 55%|█████▍    | 1093/2000 [17:35<13:28,  1.12it/s]

Training_loss 23.34451


 55%|█████▍    | 1094/2000 [17:36<13:04,  1.15it/s]

Training_loss 23.86774


 55%|█████▍    | 1095/2000 [17:37<14:35,  1.03it/s]

Training_loss 23.76963


 55%|█████▍    | 1096/2000 [17:38<14:25,  1.04it/s]

Training_loss 23.63844


 55%|█████▍    | 1097/2000 [17:39<14:31,  1.04it/s]

Training_loss 23.82797


 55%|█████▍    | 1098/2000 [17:40<14:44,  1.02it/s]

Training_loss 23.40862


 55%|█████▍    | 1099/2000 [17:41<13:57,  1.08it/s]

Training_loss 23.57400


 55%|█████▌    | 1100/2000 [17:42<14:04,  1.07it/s]

Training_loss 23.70633


 55%|█████▌    | 1101/2000 [17:43<14:14,  1.05it/s]

Training_loss 23.72137


 55%|█████▌    | 1102/2000 [17:44<13:50,  1.08it/s]

Training_loss 24.11748


 55%|█████▌    | 1103/2000 [17:45<13:31,  1.11it/s]

Training_loss 23.92296


 55%|█████▌    | 1104/2000 [17:46<13:55,  1.07it/s]

Training_loss 23.92313


 55%|█████▌    | 1105/2000 [17:47<13:07,  1.14it/s]

Training_loss 23.59646


 55%|█████▌    | 1106/2000 [17:47<12:37,  1.18it/s]

Training_loss 24.00262


 55%|█████▌    | 1107/2000 [17:48<12:57,  1.15it/s]

Training_loss 23.92815


 55%|█████▌    | 1108/2000 [17:49<12:19,  1.21it/s]

Training_loss 23.93300


 55%|█████▌    | 1109/2000 [17:50<12:31,  1.19it/s]

Training_loss 23.76263


 56%|█████▌    | 1110/2000 [17:51<12:03,  1.23it/s]

Training_loss 23.81422


 56%|█████▌    | 1111/2000 [17:51<11:46,  1.26it/s]

Training_loss 23.67729


 56%|█████▌    | 1112/2000 [17:53<13:48,  1.07it/s]

Training_loss 23.55917


 56%|█████▌    | 1113/2000 [17:54<14:16,  1.04it/s]

Training_loss 23.68148


 56%|█████▌    | 1114/2000 [17:55<13:58,  1.06it/s]

Training_loss 23.62369


 56%|█████▌    | 1115/2000 [17:55<13:11,  1.12it/s]

Training_loss 23.62075


 56%|█████▌    | 1116/2000 [17:56<12:15,  1.20it/s]

Training_loss 23.80531


 56%|█████▌    | 1117/2000 [17:57<12:14,  1.20it/s]

Training_loss 23.68980


 56%|█████▌    | 1118/2000 [17:58<12:46,  1.15it/s]

Training_loss 23.53471


 56%|█████▌    | 1119/2000 [17:59<12:54,  1.14it/s]

Training_loss 23.78985


 56%|█████▌    | 1120/2000 [18:00<12:21,  1.19it/s]

Training_loss 23.56325


 56%|█████▌    | 1121/2000 [18:00<12:48,  1.14it/s]

Training_loss 23.63070


 56%|█████▌    | 1122/2000 [18:01<13:00,  1.13it/s]

Training_loss 23.60276


 56%|█████▌    | 1123/2000 [18:02<13:39,  1.07it/s]

Training_loss 23.63685


 56%|█████▌    | 1124/2000 [18:03<14:17,  1.02it/s]

Training_loss 23.43059


 56%|█████▋    | 1125/2000 [18:04<14:13,  1.03it/s]

Training_loss 23.36013


 56%|█████▋    | 1126/2000 [18:05<14:03,  1.04it/s]

Training_loss 23.12736


 56%|█████▋    | 1127/2000 [18:06<14:01,  1.04it/s]

Training_loss 23.53444


 56%|█████▋    | 1128/2000 [18:07<14:05,  1.03it/s]

Training_loss 23.22365


 56%|█████▋    | 1129/2000 [18:08<14:01,  1.03it/s]

Training_loss 23.02510


 56%|█████▋    | 1130/2000 [18:09<14:51,  1.02s/it]

Training_loss 23.40726


 57%|█████▋    | 1131/2000 [18:10<14:07,  1.03it/s]

Training_loss 23.63055


 57%|█████▋    | 1132/2000 [18:11<13:31,  1.07it/s]

Training_loss 23.30395


 57%|█████▋    | 1133/2000 [18:12<12:49,  1.13it/s]

Training_loss 23.15624


 57%|█████▋    | 1134/2000 [18:13<12:31,  1.15it/s]

Training_loss 23.00262


 57%|█████▋    | 1135/2000 [18:14<12:35,  1.14it/s]

Training_loss 22.69358


 57%|█████▋    | 1136/2000 [18:15<13:02,  1.10it/s]

Training_loss 22.58747


 57%|█████▋    | 1137/2000 [18:16<14:11,  1.01it/s]

Training_loss 22.74736


 57%|█████▋    | 1138/2000 [18:17<13:46,  1.04it/s]

Training_loss 23.10157


 57%|█████▋    | 1139/2000 [18:17<12:57,  1.11it/s]

Training_loss 23.14924


 57%|█████▋    | 1140/2000 [18:18<13:13,  1.08it/s]

Training_loss 23.07621


 57%|█████▋    | 1141/2000 [18:19<12:38,  1.13it/s]

Training_loss 22.88456


 57%|█████▋    | 1142/2000 [18:20<13:02,  1.10it/s]

Training_loss 23.09343


 57%|█████▋    | 1143/2000 [18:21<13:37,  1.05it/s]

Training_loss 23.21966


 57%|█████▋    | 1144/2000 [18:22<14:25,  1.01s/it]

Training_loss 23.38203


 57%|█████▋    | 1145/2000 [18:23<13:48,  1.03it/s]

Training_loss 23.42938


 57%|█████▋    | 1146/2000 [18:24<12:57,  1.10it/s]

Training_loss 23.47296


 57%|█████▋    | 1147/2000 [18:25<13:11,  1.08it/s]

Training_loss 23.34080


 57%|█████▋    | 1148/2000 [18:26<14:45,  1.04s/it]

Training_loss 23.41772


 57%|█████▋    | 1149/2000 [18:27<14:21,  1.01s/it]

Training_loss 23.37686


 57%|█████▊    | 1150/2000 [18:28<14:43,  1.04s/it]

Training_loss 23.24020


 58%|█████▊    | 1151/2000 [18:29<13:36,  1.04it/s]

Training_loss 23.30732


 58%|█████▊    | 1152/2000 [18:30<12:50,  1.10it/s]

Training_loss 23.19064


 58%|█████▊    | 1153/2000 [18:31<12:15,  1.15it/s]

Training_loss 23.32768


 58%|█████▊    | 1154/2000 [18:32<12:56,  1.09it/s]

Training_loss 23.27075


 58%|█████▊    | 1155/2000 [18:33<15:35,  1.11s/it]

Training_loss 23.40373


 58%|█████▊    | 1156/2000 [18:34<15:09,  1.08s/it]

Training_loss 23.68072


 58%|█████▊    | 1157/2000 [18:35<15:34,  1.11s/it]

Training_loss 23.51430


 58%|█████▊    | 1158/2000 [18:37<15:25,  1.10s/it]

Training_loss 23.69899


 58%|█████▊    | 1159/2000 [18:38<15:19,  1.09s/it]

Training_loss 23.74043


 58%|█████▊    | 1160/2000 [18:38<13:51,  1.01it/s]

Training_loss 23.98439


 58%|█████▊    | 1161/2000 [18:39<13:59,  1.00s/it]

Training_loss 23.74949


 58%|█████▊    | 1162/2000 [18:40<13:24,  1.04it/s]

Training_loss 23.91429


 58%|█████▊    | 1163/2000 [18:41<13:58,  1.00s/it]

Training_loss 24.08992


 58%|█████▊    | 1164/2000 [18:42<13:23,  1.04it/s]

Training_loss 24.34962


 58%|█████▊    | 1165/2000 [18:43<13:36,  1.02it/s]

Training_loss 24.26274


 58%|█████▊    | 1166/2000 [18:44<13:47,  1.01it/s]

Training_loss 24.22179


 58%|█████▊    | 1167/2000 [18:45<12:46,  1.09it/s]

Training_loss 24.53084


 58%|█████▊    | 1168/2000 [18:46<12:31,  1.11it/s]

Training_loss 24.22470


 58%|█████▊    | 1169/2000 [18:47<11:58,  1.16it/s]

Training_loss 24.22840


 58%|█████▊    | 1170/2000 [18:48<12:13,  1.13it/s]

Training_loss 23.96879


 59%|█████▊    | 1171/2000 [18:48<12:09,  1.14it/s]

Training_loss 23.93247


 59%|█████▊    | 1172/2000 [18:49<12:09,  1.13it/s]

Training_loss 24.10988


 59%|█████▊    | 1173/2000 [18:50<12:38,  1.09it/s]

Training_loss 23.92234


 59%|█████▊    | 1174/2000 [18:51<12:39,  1.09it/s]

Training_loss 23.88802


 59%|█████▉    | 1175/2000 [18:52<12:27,  1.10it/s]

Training_loss 24.28992


 59%|█████▉    | 1176/2000 [18:53<12:21,  1.11it/s]

Training_loss 24.33615


 59%|█████▉    | 1177/2000 [18:54<11:51,  1.16it/s]

Training_loss 24.34193


 59%|█████▉    | 1178/2000 [18:55<11:15,  1.22it/s]

Training_loss 24.12011


 59%|█████▉    | 1179/2000 [18:55<11:11,  1.22it/s]

Training_loss 24.04923


 59%|█████▉    | 1180/2000 [18:56<11:37,  1.18it/s]

Training_loss 23.82087


 59%|█████▉    | 1181/2000 [18:57<11:49,  1.15it/s]

Training_loss 23.82436


 59%|█████▉    | 1182/2000 [18:58<12:54,  1.06it/s]

Training_loss 23.91527


 59%|█████▉    | 1183/2000 [18:59<12:25,  1.10it/s]

Training_loss 23.84696


 59%|█████▉    | 1184/2000 [19:00<12:36,  1.08it/s]

Training_loss 23.58167


 59%|█████▉    | 1185/2000 [19:01<12:04,  1.13it/s]

Training_loss 23.77296


 59%|█████▉    | 1186/2000 [19:02<11:48,  1.15it/s]

Training_loss 23.66178


 59%|█████▉    | 1187/2000 [19:03<11:34,  1.17it/s]

Training_loss 23.69628


 59%|█████▉    | 1188/2000 [19:03<11:10,  1.21it/s]

Training_loss 23.75117


 59%|█████▉    | 1189/2000 [19:04<11:56,  1.13it/s]

Training_loss 23.51599


 60%|█████▉    | 1190/2000 [19:05<11:31,  1.17it/s]

Training_loss 23.49146


 60%|█████▉    | 1191/2000 [19:06<11:20,  1.19it/s]

Training_loss 23.71782


 60%|█████▉    | 1192/2000 [19:07<12:39,  1.06it/s]

Training_loss 23.79464


 60%|█████▉    | 1193/2000 [19:08<12:49,  1.05it/s]

Training_loss 23.78261


 60%|█████▉    | 1194/2000 [19:09<12:43,  1.06it/s]

Training_loss 23.69271


 60%|█████▉    | 1195/2000 [19:10<11:53,  1.13it/s]

Training_loss 23.49794


 60%|█████▉    | 1196/2000 [19:11<12:59,  1.03it/s]

Training_loss 23.59016


 60%|█████▉    | 1197/2000 [19:12<13:10,  1.02it/s]

Training_loss 23.77039


 60%|█████▉    | 1198/2000 [19:13<14:23,  1.08s/it]

Training_loss 23.70147


 60%|█████▉    | 1199/2000 [19:14<14:38,  1.10s/it]

Training_loss 23.68757


 60%|██████    | 1200/2000 [19:15<13:33,  1.02s/it]

Training_loss 23.64599


 60%|██████    | 1201/2000 [19:16<12:58,  1.03it/s]

Training_loss 23.58683


 60%|██████    | 1202/2000 [19:17<12:15,  1.09it/s]

Training_loss 23.50928


 60%|██████    | 1203/2000 [19:18<12:35,  1.05it/s]

Training_loss 23.52313


 60%|██████    | 1204/2000 [19:19<12:54,  1.03it/s]

Training_loss 23.33821


 60%|██████    | 1205/2000 [19:20<13:06,  1.01it/s]

Training_loss 23.21412


 60%|██████    | 1206/2000 [19:21<13:39,  1.03s/it]

Training_loss 23.15504


 60%|██████    | 1207/2000 [19:22<13:43,  1.04s/it]

Training_loss 22.98384


 60%|██████    | 1208/2000 [19:23<13:14,  1.00s/it]

Training_loss 23.27423


 60%|██████    | 1209/2000 [19:24<12:32,  1.05it/s]

Training_loss 23.27218


 60%|██████    | 1210/2000 [19:25<12:00,  1.10it/s]

Training_loss 23.13738


 61%|██████    | 1211/2000 [19:25<11:21,  1.16it/s]

Training_loss 23.22501


 61%|██████    | 1212/2000 [19:26<11:20,  1.16it/s]

Training_loss 23.34272


 61%|██████    | 1213/2000 [19:27<11:37,  1.13it/s]

Training_loss 23.41533


 61%|██████    | 1214/2000 [19:28<11:12,  1.17it/s]

Training_loss 23.37654


 61%|██████    | 1215/2000 [19:29<10:58,  1.19it/s]

Training_loss 23.35814


 61%|██████    | 1216/2000 [19:30<10:59,  1.19it/s]

Training_loss 23.44205


 61%|██████    | 1217/2000 [19:31<11:37,  1.12it/s]

Training_loss 23.41175


 61%|██████    | 1218/2000 [19:31<11:13,  1.16it/s]

Training_loss 23.32589


 61%|██████    | 1219/2000 [19:32<11:21,  1.15it/s]

Training_loss 23.27448


 61%|██████    | 1220/2000 [19:33<11:06,  1.17it/s]

Training_loss 23.25566


 61%|██████    | 1221/2000 [19:34<10:40,  1.22it/s]

Training_loss 23.42130


 61%|██████    | 1222/2000 [19:35<10:33,  1.23it/s]

Training_loss 23.66053


 61%|██████    | 1223/2000 [19:36<10:29,  1.23it/s]

Training_loss 23.62236


 61%|██████    | 1224/2000 [19:36<10:24,  1.24it/s]

Training_loss 23.52210


 61%|██████▏   | 1225/2000 [19:37<11:19,  1.14it/s]

Training_loss 23.60339


 61%|██████▏   | 1226/2000 [19:38<10:54,  1.18it/s]

Training_loss 23.59958


 61%|██████▏   | 1227/2000 [19:39<11:41,  1.10it/s]

Training_loss 23.57040


 61%|██████▏   | 1228/2000 [19:40<11:00,  1.17it/s]

Training_loss 23.72182


 61%|██████▏   | 1229/2000 [19:41<11:15,  1.14it/s]

Training_loss 23.67034


 62%|██████▏   | 1230/2000 [19:42<10:43,  1.20it/s]

Training_loss 23.90633


 62%|██████▏   | 1231/2000 [19:42<10:47,  1.19it/s]

Training_loss 23.86192


 62%|██████▏   | 1232/2000 [19:43<11:07,  1.15it/s]

Training_loss 23.51717


 62%|██████▏   | 1233/2000 [19:44<11:44,  1.09it/s]

Training_loss 23.63451


 62%|██████▏   | 1234/2000 [19:45<11:30,  1.11it/s]

Training_loss 23.75820


 62%|██████▏   | 1235/2000 [19:46<11:48,  1.08it/s]

Training_loss 23.81308


 62%|██████▏   | 1236/2000 [19:47<11:18,  1.13it/s]

Training_loss 23.83377


 62%|██████▏   | 1237/2000 [19:48<11:27,  1.11it/s]

Training_loss 24.24634


 62%|██████▏   | 1238/2000 [19:49<11:41,  1.09it/s]

Training_loss 24.36005


 62%|██████▏   | 1239/2000 [19:50<11:49,  1.07it/s]

Training_loss 24.32132


 62%|██████▏   | 1240/2000 [19:51<11:03,  1.14it/s]

Training_loss 23.81642


 62%|██████▏   | 1241/2000 [19:52<10:51,  1.17it/s]

Training_loss 23.87047


 62%|██████▏   | 1242/2000 [19:53<11:28,  1.10it/s]

Training_loss 24.00708


 62%|██████▏   | 1243/2000 [19:53<11:21,  1.11it/s]

Training_loss 23.93858


 62%|██████▏   | 1244/2000 [19:54<11:52,  1.06it/s]

Training_loss 23.86885


 62%|██████▏   | 1245/2000 [19:55<11:47,  1.07it/s]

Training_loss 23.83240


 62%|██████▏   | 1246/2000 [19:56<11:15,  1.12it/s]

Training_loss 23.50888


 62%|██████▏   | 1247/2000 [19:57<10:34,  1.19it/s]

Training_loss 23.19450


 62%|██████▏   | 1248/2000 [19:58<10:46,  1.16it/s]

Training_loss 23.16700


 62%|██████▏   | 1249/2000 [19:59<10:43,  1.17it/s]

Training_loss 23.10924


 62%|██████▎   | 1250/2000 [19:59<10:09,  1.23it/s]

Training_loss 23.20963


 63%|██████▎   | 1251/2000 [20:00<09:44,  1.28it/s]

Training_loss 23.39012


 63%|██████▎   | 1252/2000 [20:01<09:48,  1.27it/s]

Training_loss 23.41652


 63%|██████▎   | 1253/2000 [20:02<09:40,  1.29it/s]

Training_loss 23.50004


 63%|██████▎   | 1254/2000 [20:02<09:57,  1.25it/s]

Training_loss 23.63406


 63%|██████▎   | 1255/2000 [20:03<09:54,  1.25it/s]

Training_loss 23.75897


 63%|██████▎   | 1256/2000 [20:04<10:24,  1.19it/s]

Training_loss 23.71772


 63%|██████▎   | 1257/2000 [20:05<10:02,  1.23it/s]

Training_loss 23.83949


 63%|██████▎   | 1258/2000 [20:06<10:26,  1.19it/s]

Training_loss 23.84406


 63%|██████▎   | 1259/2000 [20:07<11:20,  1.09it/s]

Training_loss 23.98624


 63%|██████▎   | 1260/2000 [20:08<11:33,  1.07it/s]

Training_loss 23.95907


 63%|██████▎   | 1261/2000 [20:09<11:13,  1.10it/s]

Training_loss 24.10133


 63%|██████▎   | 1262/2000 [20:10<11:33,  1.06it/s]

Training_loss 24.11519


 63%|██████▎   | 1263/2000 [20:11<11:05,  1.11it/s]

Training_loss 23.95235


 63%|██████▎   | 1264/2000 [20:12<11:33,  1.06it/s]

Training_loss 23.92647


 63%|██████▎   | 1265/2000 [20:12<11:05,  1.10it/s]

Training_loss 24.01428


 63%|██████▎   | 1266/2000 [20:13<11:02,  1.11it/s]

Training_loss 24.21124


 63%|██████▎   | 1267/2000 [20:14<11:36,  1.05it/s]

Training_loss 24.17288


 63%|██████▎   | 1268/2000 [20:15<11:57,  1.02it/s]

Training_loss 24.16437


 63%|██████▎   | 1269/2000 [20:16<11:45,  1.04it/s]

Training_loss 24.42760


 64%|██████▎   | 1270/2000 [20:18<12:40,  1.04s/it]

Training_loss 24.40113


 64%|██████▎   | 1271/2000 [20:19<12:15,  1.01s/it]

Training_loss 24.05867


 64%|██████▎   | 1272/2000 [20:19<11:22,  1.07it/s]

Training_loss 23.95610


 64%|██████▎   | 1273/2000 [20:20<11:12,  1.08it/s]

Training_loss 23.95544


 64%|██████▎   | 1274/2000 [20:21<10:41,  1.13it/s]

Training_loss 23.84597


 64%|██████▍   | 1275/2000 [20:22<12:05,  1.00s/it]

Training_loss 23.59331


 64%|██████▍   | 1276/2000 [20:23<12:05,  1.00s/it]

Training_loss 23.29517


 64%|██████▍   | 1277/2000 [20:24<11:09,  1.08it/s]

Training_loss 23.52696


 64%|██████▍   | 1278/2000 [20:25<10:43,  1.12it/s]

Training_loss 23.55504


 64%|██████▍   | 1279/2000 [20:26<10:27,  1.15it/s]

Training_loss 23.45441


 64%|██████▍   | 1280/2000 [20:27<10:35,  1.13it/s]

Training_loss 23.44249


 64%|██████▍   | 1281/2000 [20:28<11:04,  1.08it/s]

Training_loss 23.47835


 64%|██████▍   | 1282/2000 [20:29<11:03,  1.08it/s]

Training_loss 23.48858


 64%|██████▍   | 1283/2000 [20:29<10:57,  1.09it/s]

Training_loss 23.26949


 64%|██████▍   | 1284/2000 [20:30<11:17,  1.06it/s]

Training_loss 23.21132


 64%|██████▍   | 1285/2000 [20:31<10:30,  1.13it/s]

Training_loss 23.41755


 64%|██████▍   | 1286/2000 [20:32<09:56,  1.20it/s]

Training_loss 23.36296


 64%|██████▍   | 1287/2000 [20:33<10:00,  1.19it/s]

Training_loss 23.23326


 64%|██████▍   | 1288/2000 [20:34<10:06,  1.17it/s]

Training_loss 23.23888


 64%|██████▍   | 1289/2000 [20:34<09:53,  1.20it/s]

Training_loss 23.13808


 64%|██████▍   | 1290/2000 [20:36<10:44,  1.10it/s]

Training_loss 23.23419


 65%|██████▍   | 1291/2000 [20:36<10:57,  1.08it/s]

Training_loss 23.10837


 65%|██████▍   | 1292/2000 [20:37<10:58,  1.08it/s]

Training_loss 23.23964


 65%|██████▍   | 1293/2000 [20:38<10:43,  1.10it/s]

Training_loss 23.32039


 65%|██████▍   | 1294/2000 [20:39<11:20,  1.04it/s]

Training_loss 23.39078


 65%|██████▍   | 1295/2000 [20:40<10:41,  1.10it/s]

Training_loss 23.41044


 65%|██████▍   | 1296/2000 [20:41<11:18,  1.04it/s]

Training_loss 23.52983


 65%|██████▍   | 1297/2000 [20:42<11:14,  1.04it/s]

Training_loss 23.48211


 65%|██████▍   | 1298/2000 [20:43<11:06,  1.05it/s]

Training_loss 23.58449


 65%|██████▍   | 1299/2000 [20:44<10:21,  1.13it/s]

Training_loss 23.88511


 65%|██████▌   | 1300/2000 [20:45<10:25,  1.12it/s]

Training_loss 23.96674


 65%|██████▌   | 1301/2000 [20:46<09:52,  1.18it/s]

Training_loss 23.91449


 65%|██████▌   | 1302/2000 [20:47<10:44,  1.08it/s]

Training_loss 23.74992


 65%|██████▌   | 1303/2000 [20:48<10:54,  1.06it/s]

Training_loss 23.59302


 65%|██████▌   | 1304/2000 [20:48<10:14,  1.13it/s]

Training_loss 23.75532


 65%|██████▌   | 1305/2000 [20:49<09:40,  1.20it/s]

Training_loss 23.77457


 65%|██████▌   | 1306/2000 [20:50<09:20,  1.24it/s]

Training_loss 23.61027


 65%|██████▌   | 1307/2000 [20:51<09:34,  1.21it/s]

Training_loss 23.71777


 65%|██████▌   | 1308/2000 [20:51<09:24,  1.23it/s]

Training_loss 23.86057


 65%|██████▌   | 1309/2000 [20:52<09:29,  1.21it/s]

Training_loss 23.60052


 66%|██████▌   | 1310/2000 [20:53<09:14,  1.24it/s]

Training_loss 23.72270


 66%|██████▌   | 1311/2000 [20:54<09:50,  1.17it/s]

Training_loss 23.68438


 66%|██████▌   | 1312/2000 [20:55<09:31,  1.20it/s]

Training_loss 23.53194


 66%|██████▌   | 1313/2000 [20:56<09:38,  1.19it/s]

Training_loss 23.54514


 66%|██████▌   | 1314/2000 [20:57<09:34,  1.19it/s]

Training_loss 23.64805


 66%|██████▌   | 1315/2000 [20:57<09:20,  1.22it/s]

Training_loss 23.42874


 66%|██████▌   | 1316/2000 [20:58<10:19,  1.10it/s]

Training_loss 23.48716


 66%|██████▌   | 1317/2000 [20:59<09:53,  1.15it/s]

Training_loss 23.84492


 66%|██████▌   | 1318/2000 [21:00<10:20,  1.10it/s]

Training_loss 23.85439


 66%|██████▌   | 1319/2000 [21:01<10:01,  1.13it/s]

Training_loss 24.11641


 66%|██████▌   | 1320/2000 [21:02<09:45,  1.16it/s]

Training_loss 23.82586


 66%|██████▌   | 1321/2000 [21:03<09:37,  1.18it/s]

Training_loss 23.98809


 66%|██████▌   | 1322/2000 [21:04<09:53,  1.14it/s]

Training_loss 23.98637


 66%|██████▌   | 1323/2000 [21:04<09:51,  1.14it/s]

Training_loss 23.81493


 66%|██████▌   | 1324/2000 [21:05<10:25,  1.08it/s]

Training_loss 23.76331


 66%|██████▋   | 1325/2000 [21:06<10:38,  1.06it/s]

Training_loss 23.72377


 66%|██████▋   | 1326/2000 [21:07<10:04,  1.11it/s]

Training_loss 23.77747


 66%|██████▋   | 1327/2000 [21:08<10:21,  1.08it/s]

Training_loss 23.86815


 66%|██████▋   | 1328/2000 [21:09<10:33,  1.06it/s]

Training_loss 23.80339


 66%|██████▋   | 1329/2000 [21:10<10:00,  1.12it/s]

Training_loss 23.83024


 66%|██████▋   | 1330/2000 [21:11<09:33,  1.17it/s]

Training_loss 24.04566


 67%|██████▋   | 1331/2000 [21:12<09:42,  1.15it/s]

Training_loss 24.21072


 67%|██████▋   | 1332/2000 [21:13<10:12,  1.09it/s]

Training_loss 24.19464


 67%|██████▋   | 1333/2000 [21:14<10:00,  1.11it/s]

Training_loss 24.15329


 67%|██████▋   | 1334/2000 [21:15<10:15,  1.08it/s]

Training_loss 23.83761


 67%|██████▋   | 1335/2000 [21:15<10:04,  1.10it/s]

Training_loss 23.94591


 67%|██████▋   | 1336/2000 [21:16<10:10,  1.09it/s]

Training_loss 23.79926


 67%|██████▋   | 1337/2000 [21:17<10:34,  1.04it/s]

Training_loss 23.77883


 67%|██████▋   | 1338/2000 [21:18<10:41,  1.03it/s]

Training_loss 23.91093


 67%|██████▋   | 1339/2000 [21:19<10:04,  1.09it/s]

Training_loss 24.19935


 67%|██████▋   | 1340/2000 [21:20<09:49,  1.12it/s]

Training_loss 24.36703


 67%|██████▋   | 1341/2000 [21:21<09:24,  1.17it/s]

Training_loss 24.34604


 67%|██████▋   | 1342/2000 [21:22<10:05,  1.09it/s]

Training_loss 24.46054


 67%|██████▋   | 1343/2000 [21:23<10:58,  1.00s/it]

Training_loss 24.38813


 67%|██████▋   | 1344/2000 [21:25<13:38,  1.25s/it]

Training_loss 24.32491


 67%|██████▋   | 1345/2000 [21:26<14:40,  1.34s/it]

Training_loss 24.26918


 67%|██████▋   | 1346/2000 [21:28<14:55,  1.37s/it]

Training_loss 24.35915


 67%|██████▋   | 1347/2000 [21:29<14:11,  1.30s/it]

Training_loss 24.44216


 67%|██████▋   | 1348/2000 [21:30<14:08,  1.30s/it]

Training_loss 24.33956


 67%|██████▋   | 1349/2000 [21:32<14:00,  1.29s/it]

Training_loss 24.44943


 68%|██████▊   | 1350/2000 [21:33<13:11,  1.22s/it]

Training_loss 24.76045


 68%|██████▊   | 1351/2000 [21:34<12:19,  1.14s/it]

Training_loss 24.96396


 68%|██████▊   | 1352/2000 [21:35<12:22,  1.15s/it]

Training_loss 25.00109


 68%|██████▊   | 1353/2000 [21:36<11:34,  1.07s/it]

Training_loss 24.86950


 68%|██████▊   | 1354/2000 [21:37<11:23,  1.06s/it]

Training_loss 24.75291


 68%|██████▊   | 1355/2000 [21:38<11:54,  1.11s/it]

Training_loss 24.80487


 68%|██████▊   | 1356/2000 [21:39<11:50,  1.10s/it]

Training_loss 24.89342


 68%|██████▊   | 1357/2000 [21:40<12:12,  1.14s/it]

Training_loss 24.93626


 68%|██████▊   | 1358/2000 [21:42<12:57,  1.21s/it]

Training_loss 25.03640


 68%|██████▊   | 1359/2000 [21:43<12:26,  1.16s/it]

Training_loss 24.99281


 68%|██████▊   | 1360/2000 [21:44<11:53,  1.12s/it]

Training_loss 25.12823


 68%|██████▊   | 1361/2000 [21:45<11:49,  1.11s/it]

Training_loss 24.74228


 68%|██████▊   | 1362/2000 [21:46<11:41,  1.10s/it]

Training_loss 24.66592


 68%|██████▊   | 1363/2000 [21:47<11:39,  1.10s/it]

Training_loss 24.41721


 68%|██████▊   | 1364/2000 [21:48<11:15,  1.06s/it]

Training_loss 24.54453


 68%|██████▊   | 1365/2000 [21:49<10:58,  1.04s/it]

Training_loss 24.44451


 68%|██████▊   | 1366/2000 [21:50<10:21,  1.02it/s]

Training_loss 24.59788


 68%|██████▊   | 1367/2000 [21:51<09:59,  1.06it/s]

Training_loss 24.56699


 68%|██████▊   | 1368/2000 [21:52<09:57,  1.06it/s]

Training_loss 24.70366


 68%|██████▊   | 1369/2000 [21:53<10:12,  1.03it/s]

Training_loss 24.58973


 68%|██████▊   | 1370/2000 [21:54<10:56,  1.04s/it]

Training_loss 24.83862


 69%|██████▊   | 1371/2000 [21:55<11:59,  1.14s/it]

Training_loss 24.80079


 69%|██████▊   | 1372/2000 [21:57<12:46,  1.22s/it]

Training_loss 24.58523


 69%|██████▊   | 1373/2000 [21:58<13:18,  1.27s/it]

Training_loss 24.77951


 69%|██████▊   | 1374/2000 [21:59<13:10,  1.26s/it]

Training_loss 24.87470


 69%|██████▉   | 1375/2000 [22:00<12:43,  1.22s/it]

Training_loss 24.62413


 69%|██████▉   | 1376/2000 [22:01<11:58,  1.15s/it]

Training_loss 24.67070


 69%|██████▉   | 1377/2000 [22:02<11:20,  1.09s/it]

Training_loss 24.74364


 69%|██████▉   | 1378/2000 [22:03<11:18,  1.09s/it]

Training_loss 24.99454


 69%|██████▉   | 1379/2000 [22:04<11:17,  1.09s/it]

Training_loss 24.75836


 69%|██████▉   | 1380/2000 [22:06<11:20,  1.10s/it]

Training_loss 25.00501


 69%|██████▉   | 1381/2000 [22:07<11:34,  1.12s/it]

Training_loss 25.19754


 69%|██████▉   | 1382/2000 [22:08<10:49,  1.05s/it]

Training_loss 25.04762


 69%|██████▉   | 1383/2000 [22:09<10:37,  1.03s/it]

Training_loss 24.97458


 69%|██████▉   | 1384/2000 [22:10<10:34,  1.03s/it]

Training_loss 25.05510


 69%|██████▉   | 1385/2000 [22:11<10:16,  1.00s/it]

Training_loss 25.10440


 69%|██████▉   | 1386/2000 [22:12<10:13,  1.00it/s]

Training_loss 24.81902


 69%|██████▉   | 1387/2000 [22:13<10:28,  1.03s/it]

Training_loss 24.73142


 69%|██████▉   | 1388/2000 [22:14<11:25,  1.12s/it]

Training_loss 24.73834


 69%|██████▉   | 1389/2000 [22:15<11:20,  1.11s/it]

Training_loss 24.59157


 70%|██████▉   | 1390/2000 [22:16<11:36,  1.14s/it]

Training_loss 24.58201


 70%|██████▉   | 1391/2000 [22:17<11:33,  1.14s/it]

Training_loss 24.79187


 70%|██████▉   | 1392/2000 [22:19<11:36,  1.14s/it]

Training_loss 25.22930


 70%|██████▉   | 1393/2000 [22:20<11:47,  1.17s/it]

Training_loss 25.14311


 70%|██████▉   | 1394/2000 [22:21<11:35,  1.15s/it]

Training_loss 25.45187


 70%|██████▉   | 1395/2000 [22:22<11:23,  1.13s/it]

Training_loss 25.46544


 70%|██████▉   | 1396/2000 [22:23<11:36,  1.15s/it]

Training_loss 25.19641


 70%|██████▉   | 1397/2000 [22:24<11:29,  1.14s/it]

Training_loss 25.18586


 70%|██████▉   | 1398/2000 [22:25<11:26,  1.14s/it]

Training_loss 25.09424


 70%|██████▉   | 1399/2000 [22:27<11:27,  1.14s/it]

Training_loss 24.90801


 70%|███████   | 1400/2000 [22:28<11:26,  1.14s/it]

Training_loss 25.16272


 70%|███████   | 1401/2000 [22:29<11:57,  1.20s/it]

Training_loss 25.04985


 70%|███████   | 1402/2000 [22:30<12:31,  1.26s/it]

Training_loss 24.85226


 70%|███████   | 1403/2000 [22:32<12:12,  1.23s/it]

Training_loss 24.86401


 70%|███████   | 1404/2000 [22:33<11:56,  1.20s/it]

Training_loss 24.97179


 70%|███████   | 1405/2000 [22:34<11:42,  1.18s/it]

Training_loss 24.86464


 70%|███████   | 1406/2000 [22:35<11:41,  1.18s/it]

Training_loss 24.93134


 70%|███████   | 1407/2000 [22:36<11:26,  1.16s/it]

Training_loss 25.31562


 70%|███████   | 1408/2000 [22:37<11:26,  1.16s/it]

Training_loss 25.37528


 70%|███████   | 1409/2000 [22:39<11:27,  1.16s/it]

Training_loss 25.16129


 70%|███████   | 1410/2000 [22:40<11:40,  1.19s/it]

Training_loss 24.84452


 71%|███████   | 1411/2000 [22:41<11:29,  1.17s/it]

Training_loss 25.08438


 71%|███████   | 1412/2000 [22:42<11:57,  1.22s/it]

Training_loss 25.16064


 71%|███████   | 1413/2000 [22:44<12:28,  1.28s/it]

Training_loss 24.77630


 71%|███████   | 1414/2000 [22:45<13:13,  1.35s/it]

Training_loss 24.85597


 71%|███████   | 1415/2000 [22:47<13:17,  1.36s/it]

Training_loss 24.91806


 71%|███████   | 1416/2000 [22:48<12:41,  1.30s/it]

Training_loss 24.74168


 71%|███████   | 1417/2000 [22:49<12:14,  1.26s/it]

Training_loss 24.55100


 71%|███████   | 1418/2000 [22:50<11:54,  1.23s/it]

Training_loss 24.23964


 71%|███████   | 1419/2000 [22:51<11:38,  1.20s/it]

Training_loss 24.41303


 71%|███████   | 1420/2000 [22:52<11:26,  1.18s/it]

Training_loss 24.44035


 71%|███████   | 1421/2000 [22:53<11:11,  1.16s/it]

Training_loss 24.60246


 71%|███████   | 1422/2000 [22:54<10:48,  1.12s/it]

Training_loss 24.59653


 71%|███████   | 1423/2000 [22:55<10:29,  1.09s/it]

Training_loss 24.56049


 71%|███████   | 1424/2000 [22:57<10:33,  1.10s/it]

Training_loss 24.50240


 71%|███████▏  | 1425/2000 [22:58<10:32,  1.10s/it]

Training_loss 24.64876


 71%|███████▏  | 1426/2000 [22:59<10:28,  1.10s/it]

Training_loss 24.96409


 71%|███████▏  | 1427/2000 [23:00<10:28,  1.10s/it]

Training_loss 24.88916


 71%|███████▏  | 1428/2000 [23:01<10:58,  1.15s/it]

Training_loss 24.99863


 71%|███████▏  | 1429/2000 [23:02<11:03,  1.16s/it]

Training_loss 25.42166


 72%|███████▏  | 1430/2000 [23:04<11:08,  1.17s/it]

Training_loss 25.15237


 72%|███████▏  | 1431/2000 [23:05<11:15,  1.19s/it]

Training_loss 25.26309


 72%|███████▏  | 1432/2000 [23:06<10:43,  1.13s/it]

Training_loss 25.38411


 72%|███████▏  | 1433/2000 [23:07<10:36,  1.12s/it]

Training_loss 25.40891


 72%|███████▏  | 1434/2000 [23:08<10:27,  1.11s/it]

Training_loss 25.45860


 72%|███████▏  | 1435/2000 [23:09<10:40,  1.13s/it]

Training_loss 25.42591


 72%|███████▏  | 1436/2000 [23:10<10:32,  1.12s/it]

Training_loss 25.52680


 72%|███████▏  | 1437/2000 [23:11<10:36,  1.13s/it]

Training_loss 25.76698


 72%|███████▏  | 1438/2000 [23:13<10:32,  1.13s/it]

Training_loss 25.66111


 72%|███████▏  | 1439/2000 [23:14<10:51,  1.16s/it]

Training_loss 25.94965


 72%|███████▏  | 1440/2000 [23:15<10:49,  1.16s/it]

Training_loss 26.14125


 72%|███████▏  | 1441/2000 [23:16<10:41,  1.15s/it]

Training_loss 26.26065


 72%|███████▏  | 1442/2000 [23:17<10:36,  1.14s/it]

Training_loss 25.78672


 72%|███████▏  | 1443/2000 [23:18<10:46,  1.16s/it]

Training_loss 25.63799


 72%|███████▏  | 1444/2000 [23:19<10:11,  1.10s/it]

Training_loss 25.18495


 72%|███████▏  | 1445/2000 [23:20<09:26,  1.02s/it]

Training_loss 25.31791


 72%|███████▏  | 1446/2000 [23:21<09:01,  1.02it/s]

Training_loss 25.46988


 72%|███████▏  | 1447/2000 [23:22<09:31,  1.03s/it]

Training_loss 25.51637


 72%|███████▏  | 1448/2000 [23:23<09:01,  1.02it/s]

Training_loss 25.81096


 72%|███████▏  | 1449/2000 [23:24<08:57,  1.03it/s]

Training_loss 25.67268


 72%|███████▎  | 1450/2000 [23:25<08:31,  1.08it/s]

Training_loss 25.67118


 73%|███████▎  | 1451/2000 [23:26<08:31,  1.07it/s]

Training_loss 25.81043


 73%|███████▎  | 1452/2000 [23:27<08:06,  1.13it/s]

Training_loss 25.87866


 73%|███████▎  | 1453/2000 [23:27<07:43,  1.18it/s]

Training_loss 25.83017


 73%|███████▎  | 1454/2000 [23:28<07:30,  1.21it/s]

Training_loss 25.79713


 73%|███████▎  | 1455/2000 [23:29<08:23,  1.08it/s]

Training_loss 25.84283


 73%|███████▎  | 1456/2000 [23:30<08:32,  1.06it/s]

Training_loss 26.15866


 73%|███████▎  | 1457/2000 [23:31<08:18,  1.09it/s]

Training_loss 25.89930


 73%|███████▎  | 1458/2000 [23:32<08:15,  1.09it/s]

Training_loss 25.62365


 73%|███████▎  | 1459/2000 [23:33<08:04,  1.12it/s]

Training_loss 25.68514


 73%|███████▎  | 1460/2000 [23:34<08:15,  1.09it/s]

Training_loss 25.65854


 73%|███████▎  | 1461/2000 [23:35<08:36,  1.04it/s]

Training_loss 25.35084


 73%|███████▎  | 1462/2000 [23:36<08:08,  1.10it/s]

Training_loss 25.52286


 73%|███████▎  | 1463/2000 [23:37<08:13,  1.09it/s]

Training_loss 25.41887


 73%|███████▎  | 1464/2000 [23:38<08:19,  1.07it/s]

Training_loss 25.43382


 73%|███████▎  | 1465/2000 [23:39<08:22,  1.06it/s]

Training_loss 25.51038


 73%|███████▎  | 1466/2000 [23:39<08:11,  1.09it/s]

Training_loss 25.07181


 73%|███████▎  | 1467/2000 [23:40<07:50,  1.13it/s]

Training_loss 25.17305


 73%|███████▎  | 1468/2000 [23:41<07:20,  1.21it/s]

Training_loss 24.98023


 73%|███████▎  | 1469/2000 [23:42<07:53,  1.12it/s]

Training_loss 25.26962


 74%|███████▎  | 1470/2000 [23:43<08:09,  1.08it/s]

Training_loss 25.05941


 74%|███████▎  | 1471/2000 [23:44<08:35,  1.03it/s]

Training_loss 24.91806


 74%|███████▎  | 1472/2000 [23:45<08:25,  1.04it/s]

Training_loss 25.21290


 74%|███████▎  | 1473/2000 [23:46<08:01,  1.09it/s]

Training_loss 25.23257


 74%|███████▎  | 1474/2000 [23:47<07:50,  1.12it/s]

Training_loss 25.11067


 74%|███████▍  | 1475/2000 [23:48<07:50,  1.12it/s]

Training_loss 24.86135


 74%|███████▍  | 1476/2000 [23:48<07:31,  1.16it/s]

Training_loss 24.59398


 74%|███████▍  | 1477/2000 [23:49<07:15,  1.20it/s]

Training_loss 24.87545


 74%|███████▍  | 1478/2000 [23:50<07:39,  1.14it/s]

Training_loss 25.02832


 74%|███████▍  | 1479/2000 [23:51<07:57,  1.09it/s]

Training_loss 24.94965


 74%|███████▍  | 1480/2000 [23:52<07:52,  1.10it/s]

Training_loss 24.85872


 74%|███████▍  | 1481/2000 [23:53<08:05,  1.07it/s]

Training_loss 24.82191


 74%|███████▍  | 1482/2000 [23:54<07:43,  1.12it/s]

Training_loss 24.71075


 74%|███████▍  | 1483/2000 [23:55<07:50,  1.10it/s]

Training_loss 24.39420


 74%|███████▍  | 1484/2000 [23:55<07:28,  1.15it/s]

Training_loss 24.17197


 74%|███████▍  | 1485/2000 [23:56<07:15,  1.18it/s]

Training_loss 24.14884


 74%|███████▍  | 1486/2000 [23:57<07:07,  1.20it/s]

Training_loss 24.10505


 74%|███████▍  | 1487/2000 [23:58<07:32,  1.13it/s]

Training_loss 24.22870


 74%|███████▍  | 1488/2000 [23:59<07:10,  1.19it/s]

Training_loss 24.25590


 74%|███████▍  | 1489/2000 [24:00<06:57,  1.22it/s]

Training_loss 24.35949


 74%|███████▍  | 1490/2000 [24:00<07:12,  1.18it/s]

Training_loss 24.55082


 75%|███████▍  | 1491/2000 [24:01<06:48,  1.25it/s]

Training_loss 24.62308


 75%|███████▍  | 1492/2000 [24:02<07:10,  1.18it/s]

Training_loss 24.88077


 75%|███████▍  | 1493/2000 [24:03<07:00,  1.21it/s]

Training_loss 24.55393


 75%|███████▍  | 1494/2000 [24:04<07:57,  1.06it/s]

Training_loss 24.54900


 75%|███████▍  | 1495/2000 [24:05<07:45,  1.09it/s]

Training_loss 24.39239


 75%|███████▍  | 1496/2000 [24:06<07:29,  1.12it/s]

Training_loss 24.51164


 75%|███████▍  | 1497/2000 [24:07<07:18,  1.15it/s]

Training_loss 24.42642


 75%|███████▍  | 1498/2000 [24:07<06:59,  1.20it/s]

Training_loss 24.38475


 75%|███████▍  | 1499/2000 [24:08<07:20,  1.14it/s]

Training_loss 24.75369


 75%|███████▌  | 1500/2000 [24:09<07:09,  1.16it/s]

Training_loss 24.66475


 75%|███████▌  | 1501/2000 [24:10<07:26,  1.12it/s]

Training_loss 24.85579


 75%|███████▌  | 1502/2000 [24:11<07:31,  1.10it/s]

Training_loss 24.89703


 75%|███████▌  | 1503/2000 [24:12<07:16,  1.14it/s]

Training_loss 24.77479


 75%|███████▌  | 1504/2000 [24:13<07:07,  1.16it/s]

Training_loss 24.66679


 75%|███████▌  | 1505/2000 [24:14<07:12,  1.14it/s]

Training_loss 24.53336


 75%|███████▌  | 1506/2000 [24:15<08:08,  1.01it/s]

Training_loss 24.62195


 75%|███████▌  | 1507/2000 [24:16<07:56,  1.03it/s]

Training_loss 24.69005


 75%|███████▌  | 1508/2000 [24:17<07:30,  1.09it/s]

Training_loss 24.67195


 75%|███████▌  | 1509/2000 [24:17<07:10,  1.14it/s]

Training_loss 24.72131


 76%|███████▌  | 1510/2000 [24:18<07:02,  1.16it/s]

Training_loss 24.71331


 76%|███████▌  | 1511/2000 [24:19<07:19,  1.11it/s]

Training_loss 24.66555


 76%|███████▌  | 1512/2000 [24:20<07:38,  1.06it/s]

Training_loss 24.95581


 76%|███████▌  | 1513/2000 [24:21<07:45,  1.05it/s]

Training_loss 25.02516


 76%|███████▌  | 1514/2000 [24:22<07:42,  1.05it/s]

Training_loss 25.04105


 76%|███████▌  | 1515/2000 [24:23<07:47,  1.04it/s]

Training_loss 25.12763


 76%|███████▌  | 1516/2000 [24:24<07:59,  1.01it/s]

Training_loss 24.96375


 76%|███████▌  | 1517/2000 [24:25<07:33,  1.06it/s]

Training_loss 25.00454


 76%|███████▌  | 1518/2000 [24:26<07:11,  1.12it/s]

Training_loss 24.91480


 76%|███████▌  | 1519/2000 [24:27<07:24,  1.08it/s]

Training_loss 24.71094


 76%|███████▌  | 1520/2000 [24:28<07:29,  1.07it/s]

Training_loss 24.86855


 76%|███████▌  | 1521/2000 [24:29<07:38,  1.05it/s]

Training_loss 24.75429


 76%|███████▌  | 1522/2000 [24:30<08:06,  1.02s/it]

Training_loss 24.80760


 76%|███████▌  | 1523/2000 [24:31<07:34,  1.05it/s]

Training_loss 25.00697


 76%|███████▌  | 1524/2000 [24:31<06:57,  1.14it/s]

Training_loss 25.06574


 76%|███████▋  | 1525/2000 [24:32<06:56,  1.14it/s]

Training_loss 25.09107


 76%|███████▋  | 1526/2000 [24:33<07:29,  1.05it/s]

Training_loss 25.16399


 76%|███████▋  | 1527/2000 [24:35<08:00,  1.01s/it]

Training_loss 24.97906


 76%|███████▋  | 1528/2000 [24:36<08:48,  1.12s/it]

Training_loss 25.35990


 76%|███████▋  | 1529/2000 [24:37<08:09,  1.04s/it]

Training_loss 24.94799


 76%|███████▋  | 1530/2000 [24:38<07:42,  1.02it/s]

Training_loss 24.90663


 77%|███████▋  | 1531/2000 [24:39<07:58,  1.02s/it]

Training_loss 24.92969


 77%|███████▋  | 1532/2000 [24:40<07:59,  1.02s/it]

Training_loss 24.98970


 77%|███████▋  | 1533/2000 [24:41<07:43,  1.01it/s]

Training_loss 24.57057


 77%|███████▋  | 1534/2000 [24:42<07:32,  1.03it/s]

Training_loss 24.72958


 77%|███████▋  | 1535/2000 [24:43<07:16,  1.07it/s]

Training_loss 24.56006


 77%|███████▋  | 1536/2000 [24:43<07:12,  1.07it/s]

Training_loss 24.70981


 77%|███████▋  | 1537/2000 [24:44<06:48,  1.13it/s]

Training_loss 24.44892


 77%|███████▋  | 1538/2000 [24:45<07:02,  1.09it/s]

Training_loss 24.64675


 77%|███████▋  | 1539/2000 [24:46<06:45,  1.14it/s]

Training_loss 24.50109


 77%|███████▋  | 1540/2000 [24:47<07:37,  1.01it/s]

Training_loss 24.69501


 77%|███████▋  | 1541/2000 [24:48<07:12,  1.06it/s]

Training_loss 24.76268


 77%|███████▋  | 1542/2000 [24:49<07:33,  1.01it/s]

Training_loss 24.69718


 77%|███████▋  | 1543/2000 [24:50<07:34,  1.01it/s]

Training_loss 24.78121


 77%|███████▋  | 1544/2000 [24:51<07:14,  1.05it/s]

Training_loss 24.77330


 77%|███████▋  | 1545/2000 [24:53<08:29,  1.12s/it]

Training_loss 24.78348


 77%|███████▋  | 1546/2000 [24:54<08:52,  1.17s/it]

Training_loss 24.71097


 77%|███████▋  | 1547/2000 [24:55<08:14,  1.09s/it]

Training_loss 24.67462


 77%|███████▋  | 1548/2000 [24:56<07:49,  1.04s/it]

Training_loss 24.56096


 77%|███████▋  | 1549/2000 [24:57<07:32,  1.00s/it]

Training_loss 24.29979


 78%|███████▊  | 1550/2000 [24:58<07:21,  1.02it/s]

Training_loss 24.64493


 78%|███████▊  | 1551/2000 [24:59<07:26,  1.01it/s]

Training_loss 24.37244


 78%|███████▊  | 1552/2000 [24:59<07:05,  1.05it/s]

Training_loss 24.41083


 78%|███████▊  | 1553/2000 [25:00<06:56,  1.07it/s]

Training_loss 24.60842


 78%|███████▊  | 1554/2000 [25:01<06:35,  1.13it/s]

Training_loss 24.68758


 78%|███████▊  | 1555/2000 [25:02<06:33,  1.13it/s]

Training_loss 24.75256


 78%|███████▊  | 1556/2000 [25:03<06:34,  1.13it/s]

Training_loss 24.99630


 78%|███████▊  | 1557/2000 [25:04<08:05,  1.10s/it]

Training_loss 24.77442


 78%|███████▊  | 1558/2000 [25:06<08:05,  1.10s/it]

Training_loss 24.95899


 78%|███████▊  | 1559/2000 [25:07<08:13,  1.12s/it]

Training_loss 24.79662


 78%|███████▊  | 1560/2000 [25:08<08:04,  1.10s/it]

Training_loss 24.88430


 78%|███████▊  | 1561/2000 [25:09<07:43,  1.06s/it]

Training_loss 24.48048


 78%|███████▊  | 1562/2000 [25:10<07:49,  1.07s/it]

Training_loss 24.41145


 78%|███████▊  | 1563/2000 [25:11<07:55,  1.09s/it]

Training_loss 24.41240


 78%|███████▊  | 1564/2000 [25:12<07:42,  1.06s/it]

Training_loss 23.90324


 78%|███████▊  | 1565/2000 [25:13<07:28,  1.03s/it]

Training_loss 24.12538


 78%|███████▊  | 1566/2000 [25:14<07:27,  1.03s/it]

Training_loss 24.37366


 78%|███████▊  | 1567/2000 [25:15<07:34,  1.05s/it]

Training_loss 24.22098


 78%|███████▊  | 1568/2000 [25:16<07:27,  1.04s/it]

Training_loss 24.42097


 78%|███████▊  | 1569/2000 [25:17<07:22,  1.03s/it]

Training_loss 24.23598


 78%|███████▊  | 1570/2000 [25:18<07:24,  1.03s/it]

Training_loss 24.28130


 79%|███████▊  | 1571/2000 [25:19<06:59,  1.02it/s]

Training_loss 24.42144


 79%|███████▊  | 1572/2000 [25:20<07:23,  1.04s/it]

Training_loss 24.48130


 79%|███████▊  | 1573/2000 [25:21<07:07,  1.00s/it]

Training_loss 24.80820


 79%|███████▊  | 1574/2000 [25:22<07:39,  1.08s/it]

Training_loss 25.03802


 79%|███████▉  | 1575/2000 [25:23<07:40,  1.08s/it]

Training_loss 25.15529


 79%|███████▉  | 1576/2000 [25:24<07:18,  1.04s/it]

Training_loss 25.11271


 79%|███████▉  | 1577/2000 [25:25<06:46,  1.04it/s]

Training_loss 25.28239


 79%|███████▉  | 1578/2000 [25:26<06:34,  1.07it/s]

Training_loss 25.19036


 79%|███████▉  | 1579/2000 [25:27<06:32,  1.07it/s]

Training_loss 25.00906


 79%|███████▉  | 1580/2000 [25:28<06:24,  1.09it/s]

Training_loss 25.06255


 79%|███████▉  | 1581/2000 [25:29<06:47,  1.03it/s]

Training_loss 24.97749


 79%|███████▉  | 1582/2000 [25:30<06:23,  1.09it/s]

Training_loss 24.96511


 79%|███████▉  | 1583/2000 [25:31<06:35,  1.05it/s]

Training_loss 25.07893


 79%|███████▉  | 1584/2000 [25:32<07:06,  1.03s/it]

Training_loss 24.62625


 79%|███████▉  | 1585/2000 [25:33<06:25,  1.08it/s]

Training_loss 24.62597


 79%|███████▉  | 1586/2000 [25:34<06:49,  1.01it/s]

Training_loss 24.61074


 79%|███████▉  | 1587/2000 [25:35<06:28,  1.06it/s]

Training_loss 24.56385


 79%|███████▉  | 1588/2000 [25:36<06:54,  1.01s/it]

Training_loss 24.44419


 79%|███████▉  | 1589/2000 [25:37<06:32,  1.05it/s]

Training_loss 24.60041


 80%|███████▉  | 1590/2000 [25:37<06:21,  1.07it/s]

Training_loss 24.57891


 80%|███████▉  | 1591/2000 [25:38<06:11,  1.10it/s]

Training_loss 24.85798


 80%|███████▉  | 1592/2000 [25:39<06:16,  1.08it/s]

Training_loss 24.45849


 80%|███████▉  | 1593/2000 [25:41<07:06,  1.05s/it]

Training_loss 24.41595


 80%|███████▉  | 1594/2000 [25:41<06:38,  1.02it/s]

Training_loss 24.60765


 80%|███████▉  | 1595/2000 [25:42<06:37,  1.02it/s]

Training_loss 24.83492


 80%|███████▉  | 1596/2000 [25:43<06:36,  1.02it/s]

Training_loss 24.90924


 80%|███████▉  | 1597/2000 [25:44<06:39,  1.01it/s]

Training_loss 25.01517


 80%|███████▉  | 1598/2000 [25:45<06:34,  1.02it/s]

Training_loss 24.76027


 80%|███████▉  | 1599/2000 [25:46<06:33,  1.02it/s]

Training_loss 24.75379


 80%|████████  | 1600/2000 [25:47<06:17,  1.06it/s]

Training_loss 24.81649


 80%|████████  | 1601/2000 [25:48<06:45,  1.02s/it]

Training_loss 25.02150


 80%|████████  | 1602/2000 [25:49<06:20,  1.05it/s]

Training_loss 25.22579


 80%|████████  | 1603/2000 [25:50<06:05,  1.08it/s]

Training_loss 25.08832


 80%|████████  | 1604/2000 [25:51<06:14,  1.06it/s]

Training_loss 24.90215


 80%|████████  | 1605/2000 [25:52<05:47,  1.14it/s]

Training_loss 24.94273


 80%|████████  | 1606/2000 [25:53<06:02,  1.09it/s]

Training_loss 25.04259


 80%|████████  | 1607/2000 [25:54<06:23,  1.02it/s]

Training_loss 24.70177


 80%|████████  | 1608/2000 [25:55<05:53,  1.11it/s]

Training_loss 24.86142


 80%|████████  | 1609/2000 [25:56<06:03,  1.08it/s]

Training_loss 25.18871


 80%|████████  | 1610/2000 [25:57<06:30,  1.00s/it]

Training_loss 25.00505


 81%|████████  | 1611/2000 [25:58<06:24,  1.01it/s]

Training_loss 25.24755


 81%|████████  | 1612/2000 [25:59<06:51,  1.06s/it]

Training_loss 25.25546


 81%|████████  | 1613/2000 [26:00<06:35,  1.02s/it]

Training_loss 24.97600


 81%|████████  | 1614/2000 [26:01<06:09,  1.05it/s]

Training_loss 24.96974


 81%|████████  | 1615/2000 [26:02<06:15,  1.03it/s]

Training_loss 24.96932


 81%|████████  | 1616/2000 [26:03<06:14,  1.03it/s]

Training_loss 25.11456


 81%|████████  | 1617/2000 [26:04<06:07,  1.04it/s]

Training_loss 24.79854


 81%|████████  | 1618/2000 [26:05<06:20,  1.00it/s]

Training_loss 24.55734


 81%|████████  | 1619/2000 [26:06<06:14,  1.02it/s]

Training_loss 24.59492


 81%|████████  | 1620/2000 [26:06<05:56,  1.07it/s]

Training_loss 24.51150


 81%|████████  | 1621/2000 [26:07<05:33,  1.13it/s]

Training_loss 24.38886


 81%|████████  | 1622/2000 [26:08<05:51,  1.08it/s]

Training_loss 24.34999


 81%|████████  | 1623/2000 [26:09<05:35,  1.12it/s]

Training_loss 24.71242


 81%|████████  | 1624/2000 [26:10<05:16,  1.19it/s]

Training_loss 24.58149


 81%|████████▏ | 1625/2000 [26:11<05:42,  1.10it/s]

Training_loss 24.68244


 81%|████████▏ | 1626/2000 [26:12<05:43,  1.09it/s]

Training_loss 24.49561


 81%|████████▏ | 1627/2000 [26:13<05:45,  1.08it/s]

Training_loss 24.44517


 81%|████████▏ | 1628/2000 [26:14<05:49,  1.07it/s]

Training_loss 24.46811


 81%|████████▏ | 1629/2000 [26:15<06:08,  1.01it/s]

Training_loss 24.32647


 82%|████████▏ | 1630/2000 [26:16<06:03,  1.02it/s]

Training_loss 24.02864


 82%|████████▏ | 1631/2000 [26:17<06:03,  1.01it/s]

Training_loss 24.60133


 82%|████████▏ | 1632/2000 [26:18<05:46,  1.06it/s]

Training_loss 24.80665


 82%|████████▏ | 1633/2000 [26:18<05:37,  1.09it/s]

Training_loss 24.60984


 82%|████████▏ | 1634/2000 [26:19<05:48,  1.05it/s]

Training_loss 24.55348


 82%|████████▏ | 1635/2000 [26:21<06:26,  1.06s/it]

Training_loss 24.37439


 82%|████████▏ | 1636/2000 [26:22<06:14,  1.03s/it]

Training_loss 24.19633


 82%|████████▏ | 1637/2000 [26:23<06:43,  1.11s/it]

Training_loss 24.49678


 82%|████████▏ | 1638/2000 [26:24<06:37,  1.10s/it]

Training_loss 24.50502


 82%|████████▏ | 1639/2000 [26:25<06:01,  1.00s/it]

Training_loss 24.78732


 82%|████████▏ | 1640/2000 [26:26<05:54,  1.01it/s]

Training_loss 24.81599


 82%|████████▏ | 1641/2000 [26:27<06:00,  1.01s/it]

Training_loss 24.59865


 82%|████████▏ | 1642/2000 [26:28<05:40,  1.05it/s]

Training_loss 24.49793


 82%|████████▏ | 1643/2000 [26:29<05:34,  1.07it/s]

Training_loss 24.45130


 82%|████████▏ | 1644/2000 [26:30<05:29,  1.08it/s]

Training_loss 24.52231


 82%|████████▏ | 1645/2000 [26:30<05:19,  1.11it/s]

Training_loss 24.63675


 82%|████████▏ | 1646/2000 [26:32<06:13,  1.06s/it]

Training_loss 24.76519


 82%|████████▏ | 1647/2000 [26:33<06:00,  1.02s/it]

Training_loss 24.63458


 82%|████████▏ | 1648/2000 [26:34<05:34,  1.05it/s]

Training_loss 24.62538


 82%|████████▏ | 1649/2000 [26:35<05:46,  1.01it/s]

Training_loss 24.66222


 82%|████████▎ | 1650/2000 [26:36<05:51,  1.00s/it]

Training_loss 24.37687


 83%|████████▎ | 1651/2000 [26:37<06:18,  1.08s/it]

Training_loss 24.69198


 83%|████████▎ | 1652/2000 [26:38<06:08,  1.06s/it]

Training_loss 24.72264


 83%|████████▎ | 1653/2000 [26:39<06:03,  1.05s/it]

Training_loss 25.04881


 83%|████████▎ | 1654/2000 [26:40<05:44,  1.00it/s]

Training_loss 25.40705


 83%|████████▎ | 1655/2000 [26:41<05:29,  1.05it/s]

Training_loss 25.29082


 83%|████████▎ | 1656/2000 [26:41<05:09,  1.11it/s]

Training_loss 24.86943


 83%|████████▎ | 1657/2000 [26:42<05:17,  1.08it/s]

Training_loss 24.97503


 83%|████████▎ | 1658/2000 [26:43<05:26,  1.05it/s]

Training_loss 24.82721


 83%|████████▎ | 1659/2000 [26:44<05:19,  1.07it/s]

Training_loss 24.66240


 83%|████████▎ | 1660/2000 [26:45<05:04,  1.12it/s]

Training_loss 24.53381


 83%|████████▎ | 1661/2000 [26:46<05:18,  1.06it/s]

Training_loss 24.59287


 83%|████████▎ | 1662/2000 [26:47<05:22,  1.05it/s]

Training_loss 24.53923


 83%|████████▎ | 1663/2000 [26:48<05:14,  1.07it/s]

Training_loss 24.73980


 83%|████████▎ | 1664/2000 [26:49<05:20,  1.05it/s]

Training_loss 24.70256


 83%|████████▎ | 1665/2000 [26:50<05:30,  1.01it/s]

Training_loss 24.36149


 83%|████████▎ | 1666/2000 [26:51<05:12,  1.07it/s]

Training_loss 24.27963


 83%|████████▎ | 1667/2000 [26:52<05:21,  1.04it/s]

Training_loss 24.54302


 83%|████████▎ | 1668/2000 [26:53<05:25,  1.02it/s]

Training_loss 24.55005


 83%|████████▎ | 1669/2000 [26:54<05:21,  1.03it/s]

Training_loss 24.60433


 84%|████████▎ | 1670/2000 [26:55<05:40,  1.03s/it]

Training_loss 24.57985


 84%|████████▎ | 1671/2000 [26:56<05:16,  1.04it/s]

Training_loss 24.42260


 84%|████████▎ | 1672/2000 [26:57<05:07,  1.07it/s]

Training_loss 24.41467


 84%|████████▎ | 1673/2000 [26:58<04:53,  1.11it/s]

Training_loss 24.47375


 84%|████████▎ | 1674/2000 [26:59<05:01,  1.08it/s]

Training_loss 24.37846


 84%|████████▍ | 1675/2000 [26:59<04:42,  1.15it/s]

Training_loss 24.48776


 84%|████████▍ | 1676/2000 [27:00<04:27,  1.21it/s]

Training_loss 24.47733


 84%|████████▍ | 1677/2000 [27:01<04:29,  1.20it/s]

Training_loss 24.41173


 84%|████████▍ | 1678/2000 [27:02<04:27,  1.20it/s]

Training_loss 24.56386


 84%|████████▍ | 1679/2000 [27:03<05:16,  1.01it/s]

Training_loss 24.33403


 84%|████████▍ | 1680/2000 [27:04<05:37,  1.05s/it]

Training_loss 24.16321


 84%|████████▍ | 1681/2000 [27:05<05:33,  1.04s/it]

Training_loss 24.53627


 84%|████████▍ | 1682/2000 [27:06<05:04,  1.05it/s]

Training_loss 24.59328


 84%|████████▍ | 1683/2000 [27:07<04:53,  1.08it/s]

Training_loss 24.47546


 84%|████████▍ | 1684/2000 [27:08<05:09,  1.02it/s]

Training_loss 24.66993


 84%|████████▍ | 1685/2000 [27:09<04:56,  1.06it/s]

Training_loss 24.64168


 84%|████████▍ | 1686/2000 [27:10<04:49,  1.08it/s]

Training_loss 24.83290


 84%|████████▍ | 1687/2000 [27:11<04:43,  1.10it/s]

Training_loss 24.98565


 84%|████████▍ | 1688/2000 [27:12<05:16,  1.01s/it]

Training_loss 25.08840


 84%|████████▍ | 1689/2000 [27:13<05:25,  1.05s/it]

Training_loss 24.94664


 84%|████████▍ | 1690/2000 [27:14<05:23,  1.04s/it]

Training_loss 25.26311


 85%|████████▍ | 1691/2000 [27:15<05:42,  1.11s/it]

Training_loss 25.16058


 85%|████████▍ | 1692/2000 [27:16<05:42,  1.11s/it]

Training_loss 25.25639


 85%|████████▍ | 1693/2000 [27:17<05:07,  1.00s/it]

Training_loss 25.41485


 85%|████████▍ | 1694/2000 [27:18<04:58,  1.03it/s]

Training_loss 25.35129


 85%|████████▍ | 1695/2000 [27:19<05:25,  1.07s/it]

Training_loss 25.59973


 85%|████████▍ | 1696/2000 [27:21<05:46,  1.14s/it]

Training_loss 25.43309


 85%|████████▍ | 1697/2000 [27:21<05:14,  1.04s/it]

Training_loss 25.51211


 85%|████████▍ | 1698/2000 [27:22<05:01,  1.00it/s]

Training_loss 25.48634


 85%|████████▍ | 1699/2000 [27:24<05:23,  1.08s/it]

Training_loss 25.27873


 85%|████████▌ | 1700/2000 [27:25<05:19,  1.06s/it]

Training_loss 25.03196


 85%|████████▌ | 1701/2000 [27:25<04:58,  1.00it/s]

Training_loss 24.91586


 85%|████████▌ | 1702/2000 [27:26<04:49,  1.03it/s]

Training_loss 24.61693


 85%|████████▌ | 1703/2000 [27:27<04:42,  1.05it/s]

Training_loss 24.85301


 85%|████████▌ | 1704/2000 [27:28<04:58,  1.01s/it]

Training_loss 25.04540


 85%|████████▌ | 1705/2000 [27:29<04:53,  1.00it/s]

Training_loss 24.81461


 85%|████████▌ | 1706/2000 [27:31<05:03,  1.03s/it]

Training_loss 24.68726


 85%|████████▌ | 1707/2000 [27:31<04:50,  1.01it/s]

Training_loss 24.84980


 85%|████████▌ | 1708/2000 [27:32<04:29,  1.08it/s]

Training_loss 24.77469


 85%|████████▌ | 1709/2000 [27:33<04:17,  1.13it/s]

Training_loss 25.24047


 86%|████████▌ | 1710/2000 [27:34<04:18,  1.12it/s]

Training_loss 24.99151


 86%|████████▌ | 1711/2000 [27:35<04:25,  1.09it/s]

Training_loss 24.91812


 86%|████████▌ | 1712/2000 [27:36<04:56,  1.03s/it]

Training_loss 24.70520


 86%|████████▌ | 1713/2000 [27:37<05:14,  1.10s/it]

Training_loss 24.70361


 86%|████████▌ | 1714/2000 [27:38<05:02,  1.06s/it]

Training_loss 24.65673


 86%|████████▌ | 1715/2000 [27:39<04:40,  1.02it/s]

Training_loss 24.54335


 86%|████████▌ | 1716/2000 [27:40<04:36,  1.03it/s]

Training_loss 24.63159


 86%|████████▌ | 1717/2000 [27:41<04:56,  1.05s/it]

Training_loss 24.52273


 86%|████████▌ | 1718/2000 [27:42<04:48,  1.02s/it]

Training_loss 24.39050


 86%|████████▌ | 1719/2000 [27:43<04:50,  1.04s/it]

Training_loss 24.61493


 86%|████████▌ | 1720/2000 [27:44<04:55,  1.05s/it]

Training_loss 24.42612


 86%|████████▌ | 1721/2000 [27:46<05:02,  1.08s/it]

Training_loss 24.43842


 86%|████████▌ | 1722/2000 [27:46<04:35,  1.01it/s]

Training_loss 24.06557


 86%|████████▌ | 1723/2000 [27:47<04:21,  1.06it/s]

Training_loss 24.12658


 86%|████████▌ | 1724/2000 [27:48<04:26,  1.04it/s]

Training_loss 23.92512


 86%|████████▋ | 1725/2000 [27:49<04:10,  1.10it/s]

Training_loss 24.08232


 86%|████████▋ | 1726/2000 [27:50<04:10,  1.09it/s]

Training_loss 24.23767


 86%|████████▋ | 1727/2000 [27:51<04:24,  1.03it/s]

Training_loss 24.16923


 86%|████████▋ | 1728/2000 [27:52<04:10,  1.08it/s]

Training_loss 23.92515


 86%|████████▋ | 1729/2000 [27:53<04:46,  1.06s/it]

Training_loss 24.20926


 86%|████████▋ | 1730/2000 [27:54<04:42,  1.05s/it]

Training_loss 24.23385


 87%|████████▋ | 1731/2000 [27:55<04:24,  1.02it/s]

Training_loss 24.52092


 87%|████████▋ | 1732/2000 [27:56<04:17,  1.04it/s]

Training_loss 24.74400


 87%|████████▋ | 1733/2000 [27:57<04:11,  1.06it/s]

Training_loss 24.71363


 87%|████████▋ | 1734/2000 [27:58<03:58,  1.12it/s]

Training_loss 24.86900


 87%|████████▋ | 1735/2000 [27:59<03:52,  1.14it/s]

Training_loss 24.66050


 87%|████████▋ | 1736/2000 [28:00<04:11,  1.05it/s]

Training_loss 24.65056


 87%|████████▋ | 1737/2000 [28:01<04:40,  1.06s/it]

Training_loss 24.47209


 87%|████████▋ | 1738/2000 [28:02<04:26,  1.02s/it]

Training_loss 24.50668


 87%|████████▋ | 1739/2000 [28:03<04:35,  1.06s/it]

Training_loss 24.41224


 87%|████████▋ | 1740/2000 [28:04<04:23,  1.01s/it]

Training_loss 24.29796


 87%|████████▋ | 1741/2000 [28:05<04:09,  1.04it/s]

Training_loss 24.36536


 87%|████████▋ | 1742/2000 [28:06<04:09,  1.03it/s]

Training_loss 24.50097


 87%|████████▋ | 1743/2000 [28:07<04:30,  1.05s/it]

Training_loss 24.12532


 87%|████████▋ | 1744/2000 [28:08<04:28,  1.05s/it]

Training_loss 23.93100


 87%|████████▋ | 1745/2000 [28:09<04:29,  1.06s/it]

Training_loss 23.74108


 87%|████████▋ | 1746/2000 [28:10<04:32,  1.07s/it]

Training_loss 23.66047


 87%|████████▋ | 1747/2000 [28:11<04:34,  1.08s/it]

Training_loss 23.55254


 87%|████████▋ | 1748/2000 [28:13<04:45,  1.13s/it]

Training_loss 23.52389


 87%|████████▋ | 1749/2000 [28:13<04:23,  1.05s/it]

Training_loss 23.88590


 88%|████████▊ | 1750/2000 [28:14<04:15,  1.02s/it]

Training_loss 24.15653


 88%|████████▊ | 1751/2000 [28:15<03:59,  1.04it/s]

Training_loss 24.15996


 88%|████████▊ | 1752/2000 [28:17<04:25,  1.07s/it]

Training_loss 24.00107


 88%|████████▊ | 1753/2000 [28:18<04:29,  1.09s/it]

Training_loss 23.72679


 88%|████████▊ | 1754/2000 [28:19<04:20,  1.06s/it]

Training_loss 23.71657


 88%|████████▊ | 1755/2000 [28:20<04:06,  1.01s/it]

Training_loss 23.87992


 88%|████████▊ | 1756/2000 [28:21<04:05,  1.01s/it]

Training_loss 24.10876


 88%|████████▊ | 1757/2000 [28:21<03:51,  1.05it/s]

Training_loss 24.15983


 88%|████████▊ | 1758/2000 [28:22<03:44,  1.08it/s]

Training_loss 23.91366


 88%|████████▊ | 1759/2000 [28:23<03:50,  1.04it/s]

Training_loss 24.30179


 88%|████████▊ | 1760/2000 [28:24<03:53,  1.03it/s]

Training_loss 24.62770


 88%|████████▊ | 1761/2000 [28:25<03:38,  1.09it/s]

Training_loss 24.54587


 88%|████████▊ | 1762/2000 [28:26<03:41,  1.07it/s]

Training_loss 24.19965


 88%|████████▊ | 1763/2000 [28:27<03:42,  1.06it/s]

Training_loss 24.33112


 88%|████████▊ | 1764/2000 [28:28<03:33,  1.10it/s]

Training_loss 24.53402


 88%|████████▊ | 1765/2000 [28:29<03:27,  1.13it/s]

Training_loss 24.83615


 88%|████████▊ | 1766/2000 [28:30<03:31,  1.11it/s]

Training_loss 24.65909


 88%|████████▊ | 1767/2000 [28:31<03:42,  1.05it/s]

Training_loss 24.57454


 88%|████████▊ | 1768/2000 [28:32<03:47,  1.02it/s]

Training_loss 24.64841


 88%|████████▊ | 1769/2000 [28:33<03:33,  1.08it/s]

Training_loss 24.66173


 88%|████████▊ | 1770/2000 [28:33<03:27,  1.11it/s]

Training_loss 24.84229


 89%|████████▊ | 1771/2000 [28:34<03:36,  1.06it/s]

Training_loss 24.68989


 89%|████████▊ | 1772/2000 [28:35<03:18,  1.15it/s]

Training_loss 24.84496


 89%|████████▊ | 1773/2000 [28:36<03:14,  1.17it/s]

Training_loss 24.81732


 89%|████████▊ | 1774/2000 [28:37<03:13,  1.17it/s]

Training_loss 24.93610


 89%|████████▉ | 1775/2000 [28:38<03:17,  1.14it/s]

Training_loss 24.80916


 89%|████████▉ | 1776/2000 [28:39<03:14,  1.15it/s]

Training_loss 24.84243


 89%|████████▉ | 1777/2000 [28:39<03:07,  1.19it/s]

Training_loss 24.76457


 89%|████████▉ | 1778/2000 [28:40<03:05,  1.20it/s]

Training_loss 24.54690


 89%|████████▉ | 1779/2000 [28:41<03:16,  1.13it/s]

Training_loss 24.46360


 89%|████████▉ | 1780/2000 [28:42<03:24,  1.08it/s]

Training_loss 24.55899


 89%|████████▉ | 1781/2000 [28:43<03:26,  1.06it/s]

Training_loss 24.31903


 89%|████████▉ | 1782/2000 [28:44<03:22,  1.07it/s]

Training_loss 24.23894


 89%|████████▉ | 1783/2000 [28:45<03:37,  1.00s/it]

Training_loss 24.34321


 89%|████████▉ | 1784/2000 [28:46<03:42,  1.03s/it]

Training_loss 24.37677


 89%|████████▉ | 1785/2000 [28:47<03:28,  1.03it/s]

Training_loss 24.30590


 89%|████████▉ | 1786/2000 [28:48<03:29,  1.02it/s]

Training_loss 24.33566


 89%|████████▉ | 1787/2000 [28:49<03:22,  1.05it/s]

Training_loss 24.04574


 89%|████████▉ | 1788/2000 [28:50<03:20,  1.06it/s]

Training_loss 24.24362


 89%|████████▉ | 1789/2000 [28:51<03:20,  1.05it/s]

Training_loss 23.92381


 90%|████████▉ | 1790/2000 [28:52<03:13,  1.08it/s]

Training_loss 24.07614


 90%|████████▉ | 1791/2000 [28:53<03:09,  1.10it/s]

Training_loss 24.11788


 90%|████████▉ | 1792/2000 [28:54<03:03,  1.14it/s]

Training_loss 24.25023


 90%|████████▉ | 1793/2000 [28:54<03:06,  1.11it/s]

Training_loss 24.09623


 90%|████████▉ | 1794/2000 [28:56<03:14,  1.06it/s]

Training_loss 23.77534


 90%|████████▉ | 1795/2000 [28:56<03:05,  1.11it/s]

Training_loss 23.66379


 90%|████████▉ | 1796/2000 [28:57<03:09,  1.08it/s]

Training_loss 23.97502


 90%|████████▉ | 1797/2000 [28:58<02:55,  1.15it/s]

Training_loss 23.93387


 90%|████████▉ | 1798/2000 [28:59<02:59,  1.13it/s]

Training_loss 23.84505


 90%|████████▉ | 1799/2000 [29:00<03:07,  1.07it/s]

Training_loss 23.88525


 90%|█████████ | 1800/2000 [29:01<03:27,  1.04s/it]

Training_loss 23.97855


 90%|█████████ | 1801/2000 [29:02<03:34,  1.08s/it]

Training_loss 23.95704


 90%|█████████ | 1802/2000 [29:04<03:35,  1.09s/it]

Training_loss 24.23864


 90%|█████████ | 1803/2000 [29:05<03:32,  1.08s/it]

Training_loss 23.85472


 90%|█████████ | 1804/2000 [29:06<03:22,  1.03s/it]

Training_loss 24.27987


 90%|█████████ | 1805/2000 [29:07<03:38,  1.12s/it]

Training_loss 24.50456


 90%|█████████ | 1806/2000 [29:08<03:26,  1.06s/it]

Training_loss 24.53286


 90%|█████████ | 1807/2000 [29:09<03:12,  1.00it/s]

Training_loss 24.55592


 90%|█████████ | 1808/2000 [29:10<03:11,  1.00it/s]

Training_loss 24.63457


 90%|█████████ | 1809/2000 [29:11<03:17,  1.03s/it]

Training_loss 24.57790


 90%|█████████ | 1810/2000 [29:12<03:15,  1.03s/it]

Training_loss 24.75078


 91%|█████████ | 1811/2000 [29:13<03:16,  1.04s/it]

Training_loss 24.88429


 91%|█████████ | 1812/2000 [29:14<03:08,  1.01s/it]

Training_loss 24.86324


 91%|█████████ | 1813/2000 [29:15<03:00,  1.04it/s]

Training_loss 24.73208


 91%|█████████ | 1814/2000 [29:16<03:03,  1.01it/s]

Training_loss 24.60916


 91%|█████████ | 1815/2000 [29:17<03:21,  1.09s/it]

Training_loss 24.54126


 91%|█████████ | 1816/2000 [29:18<03:13,  1.05s/it]

Training_loss 24.51096


 91%|█████████ | 1817/2000 [29:19<03:15,  1.07s/it]

Training_loss 24.53728


 91%|█████████ | 1818/2000 [29:20<03:07,  1.03s/it]

Training_loss 24.29940


 91%|█████████ | 1819/2000 [29:21<02:56,  1.03it/s]

Training_loss 24.30203


 91%|█████████ | 1820/2000 [29:22<02:48,  1.07it/s]

Training_loss 24.24969


 91%|█████████ | 1821/2000 [29:23<02:49,  1.06it/s]

Training_loss 24.46938


 91%|█████████ | 1822/2000 [29:24<02:47,  1.06it/s]

Training_loss 24.86787


 91%|█████████ | 1823/2000 [29:25<02:47,  1.06it/s]

Training_loss 24.76795


 91%|█████████ | 1824/2000 [29:25<02:42,  1.08it/s]

Training_loss 24.81942


 91%|█████████▏| 1825/2000 [29:26<02:40,  1.09it/s]

Training_loss 24.68955


 91%|█████████▏| 1826/2000 [29:27<02:36,  1.11it/s]

Training_loss 24.83743


 91%|█████████▏| 1827/2000 [29:28<02:30,  1.15it/s]

Training_loss 24.88299


 91%|█████████▏| 1828/2000 [29:29<02:26,  1.17it/s]

Training_loss 25.11459


 91%|█████████▏| 1829/2000 [29:30<02:36,  1.09it/s]

Training_loss 24.83563


 92%|█████████▏| 1830/2000 [29:31<02:42,  1.04it/s]

Training_loss 24.71121


 92%|█████████▏| 1831/2000 [29:32<02:40,  1.05it/s]

Training_loss 24.80160


 92%|█████████▏| 1832/2000 [29:33<02:41,  1.04it/s]

Training_loss 24.71796


 92%|█████████▏| 1833/2000 [29:34<03:04,  1.10s/it]

Training_loss 24.77620


 92%|█████████▏| 1834/2000 [29:35<02:52,  1.04s/it]

Training_loss 24.81552


 92%|█████████▏| 1835/2000 [29:36<02:52,  1.04s/it]

Training_loss 24.89345


 92%|█████████▏| 1836/2000 [29:37<02:43,  1.00it/s]

Training_loss 24.58572


 92%|█████████▏| 1837/2000 [29:38<02:31,  1.08it/s]

Training_loss 24.55673


 92%|█████████▏| 1838/2000 [29:39<02:34,  1.05it/s]

Training_loss 24.21093


 92%|█████████▏| 1839/2000 [29:40<02:28,  1.09it/s]

Training_loss 24.28678


 92%|█████████▏| 1840/2000 [29:41<02:30,  1.07it/s]

Training_loss 24.46208


 92%|█████████▏| 1841/2000 [29:42<02:27,  1.08it/s]

Training_loss 24.87967


 92%|█████████▏| 1842/2000 [29:43<02:39,  1.01s/it]

Training_loss 24.92931


 92%|█████████▏| 1843/2000 [29:44<02:36,  1.01it/s]

Training_loss 24.71378


 92%|█████████▏| 1844/2000 [29:45<02:33,  1.02it/s]

Training_loss 24.77537


 92%|█████████▏| 1845/2000 [29:46<02:25,  1.06it/s]

Training_loss 24.62421


 92%|█████████▏| 1846/2000 [29:47<02:22,  1.08it/s]

Training_loss 24.70727


 92%|█████████▏| 1847/2000 [29:47<02:24,  1.06it/s]

Training_loss 24.66917


 92%|█████████▏| 1848/2000 [29:49<02:33,  1.01s/it]

Training_loss 24.73163


 92%|█████████▏| 1849/2000 [29:50<02:33,  1.02s/it]

Training_loss 24.86796


 92%|█████████▎| 1850/2000 [29:51<02:34,  1.03s/it]

Training_loss 24.81805


 93%|█████████▎| 1851/2000 [29:52<02:38,  1.06s/it]

Training_loss 24.86252


 93%|█████████▎| 1852/2000 [29:53<02:35,  1.05s/it]

Training_loss 24.99122


 93%|█████████▎| 1853/2000 [29:54<02:36,  1.07s/it]

Training_loss 24.98697


 93%|█████████▎| 1854/2000 [29:55<02:36,  1.07s/it]

Training_loss 24.49071


 93%|█████████▎| 1855/2000 [29:56<02:28,  1.03s/it]

Training_loss 24.57232


 93%|█████████▎| 1856/2000 [29:57<02:19,  1.03it/s]

Training_loss 24.86393


 93%|█████████▎| 1857/2000 [29:58<02:15,  1.06it/s]

Training_loss 24.73950


 93%|█████████▎| 1858/2000 [29:58<02:04,  1.14it/s]

Training_loss 24.88493


 93%|█████████▎| 1859/2000 [29:59<02:00,  1.17it/s]

Training_loss 24.67883


 93%|█████████▎| 1860/2000 [30:00<01:56,  1.20it/s]

Training_loss 24.54057


 93%|█████████▎| 1861/2000 [30:01<01:59,  1.16it/s]

Training_loss 24.49785


 93%|█████████▎| 1862/2000 [30:02<02:01,  1.14it/s]

Training_loss 24.21335


 93%|█████████▎| 1863/2000 [30:03<01:56,  1.18it/s]

Training_loss 24.24677


 93%|█████████▎| 1864/2000 [30:03<01:53,  1.20it/s]

Training_loss 24.25853


 93%|█████████▎| 1865/2000 [30:04<01:55,  1.17it/s]

Training_loss 24.32915


 93%|█████████▎| 1866/2000 [30:05<01:53,  1.18it/s]

Training_loss 24.16648


 93%|█████████▎| 1867/2000 [30:06<02:02,  1.08it/s]

Training_loss 23.99270


 93%|█████████▎| 1868/2000 [30:07<02:03,  1.07it/s]

Training_loss 23.96752


 93%|█████████▎| 1869/2000 [30:08<01:57,  1.12it/s]

Training_loss 24.10465


 94%|█████████▎| 1870/2000 [30:09<01:59,  1.08it/s]

Training_loss 24.17191


 94%|█████████▎| 1871/2000 [30:10<02:00,  1.07it/s]

Training_loss 23.81097


 94%|█████████▎| 1872/2000 [30:11<02:05,  1.02it/s]

Training_loss 24.02977


 94%|█████████▎| 1873/2000 [30:12<01:59,  1.06it/s]

Training_loss 24.15892


 94%|█████████▎| 1874/2000 [30:13<01:56,  1.08it/s]

Training_loss 24.09943


 94%|█████████▍| 1875/2000 [30:14<01:53,  1.10it/s]

Training_loss 24.08477


 94%|█████████▍| 1876/2000 [30:15<01:51,  1.12it/s]

Training_loss 24.11478


 94%|█████████▍| 1877/2000 [30:16<01:55,  1.06it/s]

Training_loss 24.04487


 94%|█████████▍| 1878/2000 [30:16<01:52,  1.08it/s]

Training_loss 24.40485


 94%|█████████▍| 1879/2000 [30:18<02:03,  1.02s/it]

Training_loss 24.41258


 94%|█████████▍| 1880/2000 [30:19<01:59,  1.01it/s]

Training_loss 24.42452


 94%|█████████▍| 1881/2000 [30:20<02:00,  1.01s/it]

Training_loss 24.34853


 94%|█████████▍| 1882/2000 [30:21<01:54,  1.03it/s]

Training_loss 24.33527


 94%|█████████▍| 1883/2000 [30:22<01:59,  1.02s/it]

Training_loss 24.38683


 94%|█████████▍| 1884/2000 [30:23<01:57,  1.01s/it]

Training_loss 24.29924


 94%|█████████▍| 1885/2000 [30:24<01:57,  1.02s/it]

Training_loss 24.40234


 94%|█████████▍| 1886/2000 [30:25<01:47,  1.06it/s]

Training_loss 24.23359


 94%|█████████▍| 1887/2000 [30:26<01:47,  1.05it/s]

Training_loss 24.21708


 94%|█████████▍| 1888/2000 [30:27<01:52,  1.01s/it]

Training_loss 24.48575


 94%|█████████▍| 1889/2000 [30:28<01:50,  1.00it/s]

Training_loss 24.33279


 94%|█████████▍| 1890/2000 [30:29<01:49,  1.01it/s]

Training_loss 24.31653


 95%|█████████▍| 1891/2000 [30:30<01:51,  1.02s/it]

Training_loss 24.27554


 95%|█████████▍| 1892/2000 [30:31<01:49,  1.02s/it]

Training_loss 24.54487


 95%|█████████▍| 1893/2000 [30:32<02:01,  1.14s/it]

Training_loss 24.57361


 95%|█████████▍| 1894/2000 [30:33<02:02,  1.15s/it]

Training_loss 24.63837


 95%|█████████▍| 1895/2000 [30:34<01:59,  1.14s/it]

Training_loss 24.47878


 95%|█████████▍| 1896/2000 [30:35<01:51,  1.07s/it]

Training_loss 24.36931


 95%|█████████▍| 1897/2000 [30:37<01:53,  1.10s/it]

Training_loss 24.73588


 95%|█████████▍| 1898/2000 [30:38<01:54,  1.13s/it]

Training_loss 24.79160


 95%|█████████▍| 1899/2000 [30:38<01:43,  1.03s/it]

Training_loss 25.05350


 95%|█████████▌| 1900/2000 [30:40<01:45,  1.06s/it]

Training_loss 25.04864


 95%|█████████▌| 1901/2000 [30:40<01:37,  1.02it/s]

Training_loss 25.04853


 95%|█████████▌| 1902/2000 [30:41<01:34,  1.03it/s]

Training_loss 25.10270


 95%|█████████▌| 1903/2000 [30:42<01:33,  1.03it/s]

Training_loss 25.28889


 95%|█████████▌| 1904/2000 [30:43<01:27,  1.10it/s]

Training_loss 25.33660


 95%|█████████▌| 1905/2000 [30:44<01:34,  1.00it/s]

Training_loss 25.50694


 95%|█████████▌| 1906/2000 [30:45<01:27,  1.08it/s]

Training_loss 25.62859


 95%|█████████▌| 1907/2000 [30:46<01:26,  1.08it/s]

Training_loss 25.71192


 95%|█████████▌| 1908/2000 [30:47<01:29,  1.03it/s]

Training_loss 25.57254


 95%|█████████▌| 1909/2000 [30:48<01:25,  1.06it/s]

Training_loss 25.26900


 96%|█████████▌| 1910/2000 [30:49<01:21,  1.10it/s]

Training_loss 25.35803


 96%|█████████▌| 1911/2000 [30:50<01:26,  1.03it/s]

Training_loss 25.24755


 96%|█████████▌| 1912/2000 [30:51<01:22,  1.07it/s]

Training_loss 25.67901


 96%|█████████▌| 1913/2000 [30:52<01:26,  1.01it/s]

Training_loss 25.50479


 96%|█████████▌| 1914/2000 [30:53<01:21,  1.06it/s]

Training_loss 25.60957


 96%|█████████▌| 1915/2000 [30:54<01:17,  1.10it/s]

Training_loss 25.48498


 96%|█████████▌| 1916/2000 [30:54<01:15,  1.11it/s]

Training_loss 25.41755


 96%|█████████▌| 1917/2000 [30:55<01:11,  1.17it/s]

Training_loss 25.42553


 96%|█████████▌| 1918/2000 [30:56<01:20,  1.02it/s]

Training_loss 25.67602


 96%|█████████▌| 1919/2000 [30:57<01:17,  1.04it/s]

Training_loss 25.65440


 96%|█████████▌| 1920/2000 [30:58<01:19,  1.00it/s]

Training_loss 25.47676


 96%|█████████▌| 1921/2000 [30:59<01:20,  1.02s/it]

Training_loss 25.53966


 96%|█████████▌| 1922/2000 [31:00<01:18,  1.01s/it]

Training_loss 25.51790


 96%|█████████▌| 1923/2000 [31:02<01:19,  1.03s/it]

Training_loss 25.75646


 96%|█████████▌| 1924/2000 [31:03<01:21,  1.07s/it]

Training_loss 25.44754


 96%|█████████▋| 1925/2000 [31:04<01:19,  1.05s/it]

Training_loss 25.29127


 96%|█████████▋| 1926/2000 [31:05<01:18,  1.06s/it]

Training_loss 25.36865


 96%|█████████▋| 1927/2000 [31:06<01:18,  1.08s/it]

Training_loss 25.02962


 96%|█████████▋| 1928/2000 [31:07<01:17,  1.08s/it]

Training_loss 25.08010


 96%|█████████▋| 1929/2000 [31:08<01:15,  1.07s/it]

Training_loss 25.18929


 96%|█████████▋| 1930/2000 [31:09<01:13,  1.05s/it]

Training_loss 25.33387


 97%|█████████▋| 1931/2000 [31:10<01:11,  1.03s/it]

Training_loss 25.22101


 97%|█████████▋| 1932/2000 [31:11<01:11,  1.05s/it]

Training_loss 25.52621


 97%|█████████▋| 1933/2000 [31:12<01:09,  1.04s/it]

Training_loss 25.48537


 97%|█████████▋| 1934/2000 [31:13<01:09,  1.05s/it]

Training_loss 25.53670


 97%|█████████▋| 1935/2000 [31:14<01:11,  1.09s/it]

Training_loss 25.65055


 97%|█████████▋| 1936/2000 [31:16<01:11,  1.11s/it]

Training_loss 25.53951


 97%|█████████▋| 1937/2000 [31:17<01:14,  1.19s/it]

Training_loss 25.17836


 97%|█████████▋| 1938/2000 [31:19<01:21,  1.31s/it]

Training_loss 25.14517


 97%|█████████▋| 1939/2000 [31:20<01:14,  1.22s/it]

Training_loss 24.89660


 97%|█████████▋| 1940/2000 [31:21<01:10,  1.17s/it]

Training_loss 25.04401


 97%|█████████▋| 1941/2000 [31:22<01:06,  1.12s/it]

Training_loss 25.00243


 97%|█████████▋| 1942/2000 [31:23<01:04,  1.11s/it]

Training_loss 25.50920


 97%|█████████▋| 1943/2000 [31:24<00:59,  1.05s/it]

Training_loss 25.46178


 97%|█████████▋| 1944/2000 [31:24<00:55,  1.00it/s]

Training_loss 25.41318


 97%|█████████▋| 1945/2000 [31:25<00:52,  1.05it/s]

Training_loss 25.28602


 97%|█████████▋| 1946/2000 [31:26<00:52,  1.04it/s]

Training_loss 24.98265


 97%|█████████▋| 1947/2000 [31:27<00:51,  1.04it/s]

Training_loss 25.17499


 97%|█████████▋| 1948/2000 [31:28<00:50,  1.04it/s]

Training_loss 25.14425


 97%|█████████▋| 1949/2000 [31:29<00:50,  1.01it/s]

Training_loss 25.03842


 98%|█████████▊| 1950/2000 [31:30<00:50,  1.01s/it]

Training_loss 25.30457


 98%|█████████▊| 1951/2000 [31:31<00:49,  1.01s/it]

Training_loss 25.26330


 98%|█████████▊| 1952/2000 [31:32<00:49,  1.04s/it]

Training_loss 25.35500


 98%|█████████▊| 1953/2000 [31:33<00:47,  1.00s/it]

Training_loss 25.37767


 98%|█████████▊| 1954/2000 [31:35<00:47,  1.04s/it]

Training_loss 25.37251


 98%|█████████▊| 1955/2000 [31:36<00:46,  1.04s/it]

Training_loss 25.51289


 98%|█████████▊| 1956/2000 [31:36<00:44,  1.01s/it]

Training_loss 25.75298


 98%|█████████▊| 1957/2000 [31:37<00:41,  1.04it/s]

Training_loss 25.76064


 98%|█████████▊| 1958/2000 [31:38<00:40,  1.04it/s]

Training_loss 25.37130


 98%|█████████▊| 1959/2000 [31:39<00:39,  1.03it/s]

Training_loss 25.15778


 98%|█████████▊| 1960/2000 [31:40<00:39,  1.02it/s]

Training_loss 24.85188


 98%|█████████▊| 1961/2000 [31:41<00:36,  1.06it/s]

Training_loss 24.90570


 98%|█████████▊| 1962/2000 [31:42<00:36,  1.03it/s]

Training_loss 24.62095


 98%|█████████▊| 1963/2000 [31:43<00:35,  1.03it/s]

Training_loss 24.62831


 98%|█████████▊| 1964/2000 [31:44<00:36,  1.00s/it]

Training_loss 24.76745


 98%|█████████▊| 1965/2000 [31:45<00:34,  1.02it/s]

Training_loss 24.87940


 98%|█████████▊| 1966/2000 [31:46<00:33,  1.02it/s]

Training_loss 24.96024


 98%|█████████▊| 1967/2000 [31:47<00:31,  1.06it/s]

Training_loss 25.17127


 98%|█████████▊| 1968/2000 [31:48<00:30,  1.05it/s]

Training_loss 25.23291


 98%|█████████▊| 1969/2000 [31:49<00:29,  1.06it/s]

Training_loss 25.19760


 98%|█████████▊| 1970/2000 [31:50<00:28,  1.04it/s]

Training_loss 25.04568


 99%|█████████▊| 1971/2000 [31:51<00:28,  1.02it/s]

Training_loss 25.00708


 99%|█████████▊| 1972/2000 [31:52<00:27,  1.03it/s]

Training_loss 24.98215


 99%|█████████▊| 1973/2000 [31:53<00:26,  1.02it/s]

Training_loss 25.10824


 99%|█████████▊| 1974/2000 [31:54<00:26,  1.00s/it]

Training_loss 25.19043


 99%|█████████▉| 1975/2000 [31:55<00:25,  1.04s/it]

Training_loss 25.34143


 99%|█████████▉| 1976/2000 [31:56<00:24,  1.03s/it]

Training_loss 25.42105


 99%|█████████▉| 1977/2000 [31:57<00:23,  1.02s/it]

Training_loss 25.60820


 99%|█████████▉| 1978/2000 [31:58<00:22,  1.00s/it]

Training_loss 25.50955


 99%|█████████▉| 1979/2000 [31:59<00:20,  1.01it/s]

Training_loss 25.52899


 99%|█████████▉| 1980/2000 [32:00<00:19,  1.03it/s]

Training_loss 25.50491


 99%|█████████▉| 1981/2000 [32:01<00:17,  1.07it/s]

Training_loss 25.49413


 99%|█████████▉| 1982/2000 [32:02<00:16,  1.07it/s]

Training_loss 25.42043


 99%|█████████▉| 1983/2000 [32:03<00:15,  1.08it/s]

Training_loss 25.19231


 99%|█████████▉| 1984/2000 [32:04<00:14,  1.08it/s]

Training_loss 25.39630


 99%|█████████▉| 1985/2000 [32:04<00:13,  1.07it/s]

Training_loss 25.56562


 99%|█████████▉| 1986/2000 [32:05<00:13,  1.07it/s]

Training_loss 25.66005


 99%|█████████▉| 1987/2000 [32:06<00:12,  1.06it/s]

Training_loss 25.50233


 99%|█████████▉| 1988/2000 [32:07<00:11,  1.03it/s]

Training_loss 25.82936


 99%|█████████▉| 1989/2000 [32:08<00:10,  1.06it/s]

Training_loss 25.26563


100%|█████████▉| 1990/2000 [32:09<00:09,  1.02it/s]

Training_loss 25.32367


100%|█████████▉| 1991/2000 [32:10<00:08,  1.05it/s]

Training_loss 25.45192


100%|█████████▉| 1992/2000 [32:11<00:07,  1.07it/s]

Training_loss 25.10475


100%|█████████▉| 1993/2000 [32:12<00:06,  1.04it/s]

Training_loss 24.93448


100%|█████████▉| 1994/2000 [32:13<00:05,  1.05it/s]

Training_loss 24.98692


100%|█████████▉| 1995/2000 [32:14<00:04,  1.03it/s]

Training_loss 24.46770


100%|█████████▉| 1996/2000 [32:15<00:03,  1.02it/s]

Training_loss 24.46212


100%|█████████▉| 1997/2000 [32:16<00:03,  1.04s/it]

Training_loss 24.66294


100%|█████████▉| 1998/2000 [32:17<00:02,  1.01s/it]

Training_loss 24.65088


100%|█████████▉| 1999/2000 [32:18<00:01,  1.01s/it]

Training_loss 24.98538


100%|██████████| 2000/2000 [32:19<00:00,  1.03it/s]

Training_loss 25.19851





In [23]:
#Training_loss 5.33078 with no communication

In [24]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([ 0.6498,  0.5638, -0.8521,  0.1079,  0.2085,  0.3570,  0.2455,  0.0381,
        -0.2764,  1.8908,  1.1659, -0.2855,  0.1584,  0.1103, -1.0718,  1.0543,
         0.9312,  0.5311, -0.0779,  2.0297, -0.3009,  0.7207,  0.5131, -0.3004,
         0.5323,  0.6547, -0.2666, -0.0088, -0.5496,  0.5120, -0.9995, -0.6847,
        -0.3101, -0.1495, -0.3391, -1.6521,  1.0532,  2.6998,  2.1916,  2.0958],
       grad_fn=<CatBackward0>)

In [25]:
for j in G.neighbors(0):
    print(j)

2
3
4
7
8


In [26]:
parameters_to_vector(models[0].parameters())

tensor([-0.1523,  0.0142, -0.7284, -0.3574,  0.1754,  0.0159, -0.6979, -0.7857,
        -0.3527, -1.6757, -1.3106,  0.5779, -0.4595,  0.7925, -0.6702,  0.0761,
         0.7484,  0.5895, -1.4977, -0.0773, -0.2616,  0.4856, -0.2582,  0.0864,
         0.1666,  0.7773, -0.5446, -0.6613, -2.6545,  0.2856, -0.1957,  0.3802,
        -0.3286,  0.2477,  0.4638, -0.4916,  1.2379,  2.4534,  1.6759,  2.6629],
       grad_fn=<CatBackward0>)

In [27]:
projection_list[0]

[0,
 0,
 tensor([[ 1.5768e+00, -1.1026e-03,  1.0094e-03,  1.4180e-04, -3.7897e-04,
           7.6718e-04,  1.4973e-03,  9.5909e-04,  7.7417e-04, -3.6380e-03,
           3.9821e-03,  1.2326e-03, -1.5220e-03, -2.0922e-03, -5.2375e-04,
          -2.9232e-03,  4.1438e-03,  9.2690e-04, -2.0173e-03,  3.4648e-03,
          -8.5586e-04,  7.5988e-05,  1.4284e-03,  1.8537e-03,  2.6821e-03,
          -2.1515e-04,  1.8757e-03,  7.8412e-03, -3.9963e-03,  1.2591e-03,
          -7.6591e-04, -7.5186e-04, -2.8237e-04,  2.0514e-03,  2.3287e-03,
          -1.6691e-03, -2.6062e-03,  5.3221e-03,  3.1952e-03,  6.5137e-03],
         [ 1.6101e-02,  1.5855e+00,  1.8589e-02,  1.7856e-02,  5.0975e-03,
          -2.2872e-03,  7.6694e-03,  2.3616e-02,  6.6366e-03,  5.6028e-02,
           1.3587e-02, -2.6815e-02,  6.6396e-03, -1.6604e-02,  2.2693e-02,
           8.7667e-03, -5.0490e-02, -1.7764e-02,  5.3661e-02, -5.1513e-03,
           2.0821e-02, -8.0512e-03, -2.0383e-03, -8.9978e-03, -1.7928e-02,
          -2.067

In [28]:
projected_weights[0]

[0,
 0,
 tensor([-0.1959, -1.2806, -0.6038, -0.4655,  0.6666, -0.3432, -0.3384, -0.0072,
         -1.3709]),
 tensor([-0.0094, -1.2979,  0.2881, -0.0279,  0.4518, -0.3627,  0.5162,  0.9549,
         -0.9390]),
 tensor([-0.1605, -1.2838, -0.4346, -0.3825,  0.6259, -0.3469, -0.1762,  0.1753,
         -1.2889]),
 0,
 0,
 tensor([ 0.0796, -1.3062,  0.7138,  0.1810,  0.3493, -0.3720,  0.9241,  1.4141,
         -0.7329]),
 tensor([-0.0649, -1.2927,  0.0229, -0.1580,  0.5157, -0.3569,  0.2621,  0.6688,
         -1.0674]),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [29]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [30]:
print(test_loss)

[48.21755084 47.92515052 47.57178508 ... 24.65088367 24.9853778
 25.19851112]


In [31]:
np.save( 'training_loss_sheave_fml_alpha' + str(alpha).replace('.', '_') + "_eta_"+ str(eta).replace('.', '_') +  '_pout' + str(pout).replace('.', '_') + '+d0_' + str(d0), test_loss)
#np.save('relative_error_sheave_fml' + str(lamda).replace('.', '_'), total_rel_error)

In [32]:
'training_loss_sheave_fml' + str(lamda).replace('.', '_'), test_loss

('training_loss_sheave_fml0_001',
 array([48.21755084, 47.92515052, 47.57178508, ..., 24.65088367,
        24.9853778 , 25.19851112]))