In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.01
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-2
mu = 1e-2
no_users = sum(cluster_sizes)
batch_size = 50
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.         0.         0.2        0.         0.
  0.2        0.2        0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.2       ]
 [0.         0.         0.         0.         0.         0.25
  0.         0.         0.16666667 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.         0.14285714 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.33333333]
 [0.2        0.         0.         0.         0.         0.
  0.2        0.         0.16666667 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.14285714 0.         0.         0.14285714
  0.14285714 0.14285714 0.14285714 0.         0.         0.
  0.125      0.         0.         0.         0.  

In [4]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [5]:
degree_list = [G.degree(i) for i in range(no_users)]
print(degree_list)

for i in G.neighbors(0):
    print(i)

[2, 4, 2, 2, 3, 6, 3, 4, 2, 5, 7, 6, 5, 7, 6, 4, 6, 3, 4, 3]
1
3


In [6]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.size)
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split





X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')


X, y = random_split(X_train, y_train, no_users, 1234)

In [7]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 3.0, 3.0])
W2 = np.array([-2.0, 2.0, 3.0, -3.0])
W3 = 2 * W1
W4 = 2  * W2
W = [W1, W2]
m = 200
n = 4

scaler = [1.0, -1.0]

noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i ]) + np.random.normal(0,noise_sd)
        data = X[count]
        data[:, 0:4] *= scaler[i]
        datapoints[count] = {
                'features': data,
                'degree': node_degree(count, G),
                'label': y[count],
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [8]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [9]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(9, 4, bias=False)
        self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        #output = self.fc3(x)
        return output

In [10]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [11]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        #new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        #vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655,  0.2267, -0.3228, -0.2686, -0.3301,  0.1126,
        -0.0394, -0.4880,  0.1115,  0.8335, -0.0887, -0.2546, -0.2907, -0.3002,
         0.4460, -2.2253, -0.4600,  0.4040,  0.0984,  0.1270, -0.0244,  0.0132,
        -0.0421, -0.1371, -0.4225, -0.1874,  0.3231, -0.8927,  0.4509, -0.4262,
        -0.1263, -0.3973,  0.0710, -0.7483, -1.5357, -1.7467, -3.6251, -1.2475]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(52.0178, grad_fn=<MseLossBackwar

        -4.0797e-01, -1.5751e+00, -1.7819e+00, -3.2785e+00, -1.5227e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(52.6346, grad_fn=<MseLossBackward0>) tensor([ 0.0324,  0.0146, -0.0998,  0.3893, -0.3332, -0.2901, -0.3446, -0.0793,
         0.1571, -0.4194,  0.2125,  1.2375, -0.4658,  0.5467,  0.5942,  0.5274,
         0.1081, -1.8170, -0.4652,  0.4214,  0.0997, -0.0467,  0.1348,  0.2015,
         0.1201, -0.2051, -0.3993, -0.2504,  0.4929, -0.8825, -0.1061,  0.2059,
         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor(

2 tensor(43.1426, grad_fn=<MseLossBackward0>) tensor([-0.0741,  0.0478, -0.0332,  0.2938, -0.4404, -0.3010, -0.4456,  0.0672,
         0.1188, -0.7313,  0.4216,  0.9954, -0.4385,  0.3495,  0.4329,  0.3372,
         0.2655, -2.1865, -0.4205,  0.3603,  0.3063, -0.0889,  0.0716,  0.1156,
         0.0881,  0.0463, -0.2909,  0.1254,  0.0229, -0.6192, -0.1412, -0.0496,
         0.2373,  0.0473,  0.4117, -0.6123, -2.0320, -1.6228, -3.5221, -1.3218]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
2 tensor(71.4659, grad_fn=<MseLossBackward0>) tensor([-0.1852,  0.1109,  0.2426, 

torch.Size([50, 1])
torch.Size([50, 1])
2 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01, -5.2306e-04,  4.0915e-01, -4.0601e-01,
        -2.7275e-01, -4.2042e-01,  6.9891e-02,  1.7734e-02, -2.9643e-01,
         3.0144e-01,  4.2596e-01, -2.9568e-01,  1.3551e-01,  7.6462e-02,
         8.7127e-02,  5.5483e-01, -8.9083e-01, -4.6558e-01,  4.0750e-01,
        -2.2863e-02, -3.6804e-02,  6.0646e-02,  1.1635e-01,  3.0602e-02,
        -1.2093e-01, -9.2405e-02, -4.1741e-01,  5.5657e-01, -9.2449e-01,
        -3.9386e-01,  3.9742e-01,  6.4582e-01,  3.9613e-01,  3.0851e-01,
        -3.9311e-01, -2.3022e+00, -1.0920e+00, -2.6709e+00, -1.7015e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -

5 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655,  0.2267, -0.3228, -0.2686, -0.3301,  0.1126,
        -0.0394, -0.4880,  0.1115,  0.8335, -0.0887, -0.2546, -0.2907, -0.3002,
         0.4460, -2.2253, -0.4600,  0.4040,  0.0984,  0.1270, -0.0244,  0.0132,
        -0.0421, -0.1371, -0.4225, -0.1874,  0.3231, -0.8927,  0.4509, -0.4262,
        -0.1263, -0.3973,  0.0710, -0.7483, -1.5357, -1.7467, -3.6251, -1.2475]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
5 tensor(52.0178, grad_fn=<MseLossBackward0>) tensor([-0.0244,  0.0097,  0.0729, 

5 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448,  0.3873, -0.4015, -0.3400, -0.4677, -0.0818,
         0.1542, -0.4142,  0.2003,  1.6130, -0.6793,  0.7654,  0.6155,  0.6872,
         0.4222, -1.2224, -0.5165,  0.4050,  0.3507,  0.0780, -0.0248,  0.0595,
        -0.0557, -0.1758, -0.3056, -0.2032,  0.2365, -0.3457, -0.0925, -0.0422,
         0.0779, -0.1101,  0.2811, -0.6639, -1.9187, -1.4821, -3.6708, -0.8566]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
5 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738, 

7 tensor(37.2503, grad_fn=<MseLossBackward0>) tensor([ 0.1090, -0.0330, -0.2826,  0.5981, -0.5834, -0.3019, -0.6095, -0.1607,
         0.3040, -0.5040,  0.1017,  0.5342, -1.0234,  1.1540, -0.5992,  1.0991,
         0.7360, -2.5169, -0.2880,  0.2658, -0.1832, -0.0107,  0.1052, -0.2462,
         0.0935,  0.0531, -0.5584,  0.0868, -0.0958, -0.4390, -0.0056,  0.1783,
         0.3861,  0.1182,  0.2300, -2.1106, -2.8580, -3.5361, -3.3137, -1.5430]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
8 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655, 

         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
8 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448,  0.3873, -0.4015, -0.3400, -0.4677, -0.0818,
         0.1542, -0.4142,  0.2003,  1.6130, -0.6793,  0.7654,  0.6155,  0.6872,
         0.4222, -1.2224, -0.5165,  0.4050,  0.3507,  0.0780, -0.0248,  0.0595,
        -0.0557, -0.1758, -0.3056, -0.2032,  0.2365, -0.3457, -0.0925, -0.0422,
         0.0779, -0.1101,  0.2811, -0.6639, -1.9187, -1.4821, -3.6708, -0.8566]) 

10 tensor(37.2503, grad_fn=<MseLossBackward0>) tensor([ 0.1090, -0.0330, -0.2826,  0.5981, -0.5834, -0.3019, -0.6095, -0.1607,
         0.3040, -0.5040,  0.1017,  0.5342, -1.0234,  1.1540, -0.5992,  1.0991,
         0.7360, -2.5169, -0.2880,  0.2658, -0.1832, -0.0107,  0.1052, -0.2462,
         0.0935,  0.0531, -0.5584,  0.0868, -0.0958, -0.4390, -0.0056,  0.1783,
         0.3861,  0.1182,  0.2300, -2.1106, -2.8580, -3.5361, -3.3137, -1.5430]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
11 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655

torch.Size([50, 1])
torch.Size([50, 1])
11 tensor(52.6346, grad_fn=<MseLossBackward0>) tensor([ 0.0324,  0.0146, -0.0998,  0.3893, -0.3332, -0.2901, -0.3446, -0.0793,
         0.1571, -0.4194,  0.2125,  1.2375, -0.4658,  0.5467,  0.5942,  0.5274,
         0.1081, -1.8170, -0.4652,  0.4214,  0.0997, -0.0467,  0.1348,  0.2015,
         0.1201, -0.2051, -0.3993, -0.2504,  0.4929, -0.8825, -0.1061,  0.2059,
         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
11 tensor(55.9117, grad_fn=<MseLossBackw

13 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738,  0.4307, -0.4734, -0.4304, -0.4914,  0.0685,
        -0.1625, -0.0727, -0.1766,  0.6218,  0.2194, -0.5895, -0.1209, -0.5960,
         0.4323, -2.1835, -0.3746,  0.2780,  0.1965, -0.0471,  0.1704,  0.2219,
         0.1486, -0.3160, -0.5000, -0.0172,  0.1347, -0.6282, -0.3691,  0.1746,
         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
13 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078

14 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621,  0.3524, -0.4307, -0.2647, -0.4134,  0.1469,
         0.0700, -0.0629, -0.3660,  0.6588, -0.6120,  0.3034,  0.6519,  0.5415,
        -0.0112, -2.9616, -0.4833,  0.3511,  0.2303, -0.1060,  0.1885,  0.2287,
         0.1827, -0.2201, -0.4090,  0.1034,  0.1254, -0.7286,  0.2786, -0.4356,
         0.1601, -0.2720,  0.4215, -0.8392, -2.4366, -1.8569, -4.2604, -1.5827]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
14 tensor(46.5566, grad_fn=<MseLossBackward0>) tensor([-5.0952e-02, -2.4350e-02,

16 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738,  0.4307, -0.4734, -0.4304, -0.4914,  0.0685,
        -0.1625, -0.0727, -0.1766,  0.6218,  0.2194, -0.5895, -0.1209, -0.5960,
         0.4323, -2.1835, -0.3746,  0.2780,  0.1965, -0.0471,  0.1704,  0.2219,
         0.1486, -0.3160, -0.5000, -0.0172,  0.1347, -0.6282, -0.3691,  0.1746,
         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
16 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078

17 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573,  0.4532, -0.6506, -0.4613, -0.5968,  0.0656,
         0.0952, -0.1512, -0.2137,  1.0715,  0.0685, -0.6441, -1.3928, -0.7339,
         0.7034, -1.0632, -0.3117,  0.2163, -0.0877, -0.0223,  0.0224,  0.0337,
         0.0300, -0.1995, -0.2968,  0.1634, -0.1488, -0.8258,  0.1696, -0.4593,
         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
17 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621

torch.Size([50, 1])
torch.Size([50, 1])
19 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078,  0.3358, -0.2964, -0.2533, -0.3703,  0.1283,
         0.1407, -0.0480,  0.0632,  1.3674, -0.7643,  0.3101, -0.0305,  0.2660,
         1.2382, -0.1131, -0.3213,  0.3047, -0.0589, -0.0235,  0.0912,  0.0793,
         0.0380,  0.0618, -0.2147, -0.2281,  0.3903, -1.4909,  0.3990, -0.1413,
         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
19 tensor(44.2423, grad_fn=<MseLossBackw

20 tensor(46.5566, grad_fn=<MseLossBackward0>) tensor([-5.0952e-02, -2.4350e-02,  2.0477e-01,  1.7346e-01, -3.1018e-01,
        -2.5375e-01, -3.4807e-01,  1.8455e-01, -2.6644e-02, -3.8329e-01,
        -5.0870e-02,  9.7430e-01, -3.3182e-01, -2.7975e-01, -1.3494e-01,
        -1.9798e-01,  8.3880e-01, -1.9176e+00, -3.5443e-01,  2.6921e-01,
         2.0183e-01, -3.8693e-02,  3.5878e-02,  1.2302e-01,  1.3757e-03,
        -6.6133e-02, -5.2172e-01,  2.6807e-01, -1.5027e-01, -9.0552e-01,
        -3.2231e-01,  1.7175e-01,  5.2815e-01,  2.4943e-01,  3.7001e-01,
        -4.0797e-01, -1.5751e+00, -1.7819e+00, -3.2785e+00, -1.5227e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

23 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655,  0.2267, -0.3228, -0.2686, -0.3301,  0.1126,
        -0.0394, -0.4880,  0.1115,  0.8335, -0.0887, -0.2546, -0.2907, -0.3002,
         0.4460, -2.2253, -0.4600,  0.4040,  0.0984,  0.1270, -0.0244,  0.0132,
        -0.0421, -0.1371, -0.4225, -0.1874,  0.3231, -0.8927,  0.4509, -0.4262,
        -0.1263, -0.3973,  0.0710, -0.7483, -1.5357, -1.7467, -3.6251, -1.2475]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
23 tensor(52.0178, grad_fn=<MseLossBackward0>) tensor([-0.0244,  0.0097,  0.0729

23 tensor(52.6346, grad_fn=<MseLossBackward0>) tensor([ 0.0324,  0.0146, -0.0998,  0.3893, -0.3332, -0.2901, -0.3446, -0.0793,
         0.1571, -0.4194,  0.2125,  1.2375, -0.4658,  0.5467,  0.5942,  0.5274,
         0.1081, -1.8170, -0.4652,  0.4214,  0.0997, -0.0467,  0.1348,  0.2015,
         0.1201, -0.2051, -0.3993, -0.2504,  0.4929, -0.8825, -0.1061,  0.2059,
         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
23 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448

25 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448,  0.3873, -0.4015, -0.3400, -0.4677, -0.0818,
         0.1542, -0.4142,  0.2003,  1.6130, -0.6793,  0.7654,  0.6155,  0.6872,
         0.4222, -1.2224, -0.5165,  0.4050,  0.3507,  0.0780, -0.0248,  0.0595,
        -0.0557, -0.1758, -0.3056, -0.2032,  0.2365, -0.3457, -0.0925, -0.0422,
         0.0779, -0.1101,  0.2811, -0.6639, -1.9187, -1.4821, -3.6708, -0.8566]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
25 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738

26 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573,  0.4532, -0.6506, -0.4613, -0.5968,  0.0656,
         0.0952, -0.1512, -0.2137,  1.0715,  0.0685, -0.6441, -1.3928, -0.7339,
         0.7034, -1.0632, -0.3117,  0.2163, -0.0877, -0.0223,  0.0224,  0.0337,
         0.0300, -0.1995, -0.2968,  0.1634, -0.1488, -0.8258,  0.1696, -0.4593,
         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
26 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621

         0.0779, -0.1101,  0.2811, -0.6639, -1.9187, -1.4821, -3.6708, -0.8566]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
28 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738,  0.4307, -0.4734, -0.4304, -0.4914,  0.0685,
        -0.1625, -0.0727, -0.1766,  0.6218,  0.2194, -0.5895, -0.1209, -0.5960,
         0.4323, -2.1835, -0.3746,  0.2780,  0.1965, -0.0471,  0.1704,  0.2219,
         0.1486, -0.3160, -0.5000, -0.0172,  0.1347, -0.6282, -0.3691,  0.1746,
         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749])

29 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573,  0.4532, -0.6506, -0.4613, -0.5968,  0.0656,
         0.0952, -0.1512, -0.2137,  1.0715,  0.0685, -0.6441, -1.3928, -0.7339,
         0.7034, -1.0632, -0.3117,  0.2163, -0.0877, -0.0223,  0.0224,  0.0337,
         0.0300, -0.1995, -0.2968,  0.1634, -0.1488, -0.8258,  0.1696, -0.4593,
         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
29 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621

torch.Size([50, 1])
torch.Size([50, 1])
31 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078,  0.3358, -0.2964, -0.2533, -0.3703,  0.1283,
         0.1407, -0.0480,  0.0632,  1.3674, -0.7643,  0.3101, -0.0305,  0.2660,
         1.2382, -0.1131, -0.3213,  0.3047, -0.0589, -0.0235,  0.0912,  0.0793,
         0.0380,  0.0618, -0.2147, -0.2281,  0.3903, -1.4909,  0.3990, -0.1413,
         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
31 tensor(44.2423, grad_fn=<MseLossBackw

32 tensor(46.5566, grad_fn=<MseLossBackward0>) tensor([-5.0952e-02, -2.4350e-02,  2.0477e-01,  1.7346e-01, -3.1018e-01,
        -2.5375e-01, -3.4807e-01,  1.8455e-01, -2.6644e-02, -3.8329e-01,
        -5.0870e-02,  9.7430e-01, -3.3182e-01, -2.7975e-01, -1.3494e-01,
        -1.9798e-01,  8.3880e-01, -1.9176e+00, -3.5443e-01,  2.6921e-01,
         2.0183e-01, -3.8693e-02,  3.5878e-02,  1.2302e-01,  1.3757e-03,
        -6.6133e-02, -5.2172e-01,  2.6807e-01, -1.5027e-01, -9.0552e-01,
        -3.2231e-01,  1.7175e-01,  5.2815e-01,  2.4943e-01,  3.7001e-01,
        -4.0797e-01, -1.5751e+00, -1.7819e+00, -3.2785e+00, -1.5227e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

34 tensor(37.2503, grad_fn=<MseLossBackward0>) tensor([ 0.1090, -0.0330, -0.2826,  0.5981, -0.5834, -0.3019, -0.6095, -0.1607,
         0.3040, -0.5040,  0.1017,  0.5342, -1.0234,  1.1540, -0.5992,  1.0991,
         0.7360, -2.5169, -0.2880,  0.2658, -0.1832, -0.0107,  0.1052, -0.2462,
         0.0935,  0.0531, -0.5584,  0.0868, -0.0958, -0.4390, -0.0056,  0.1783,
         0.3861,  0.1182,  0.2300, -2.1106, -2.8580, -3.5361, -3.3137, -1.5430]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
35 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655

35 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448,  0.3873, -0.4015, -0.3400, -0.4677, -0.0818,
         0.1542, -0.4142,  0.2003,  1.6130, -0.6793,  0.7654,  0.6155,  0.6872,
         0.4222, -1.2224, -0.5165,  0.4050,  0.3507,  0.0780, -0.0248,  0.0595,
        -0.0557, -0.1758, -0.3056, -0.2032,  0.2365, -0.3457, -0.0925, -0.0422,
         0.0779, -0.1101,  0.2811, -0.6639, -1.9187, -1.4821, -3.6708, -0.8566]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
35 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738

torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(51.4716, grad_fn=<MseLossBackward0>) tensor([-2.3031e-01,  1.8165e-01, -1.6926e-01,  4.4679e-01, -4.7382e-01,
        -2.3629e-01, -4.2067e-01,  2.7646e-03,  1.0064e-02, -2.3193e-01,
         2.5040e-01,  9.8199e-01, -1.8288e-01,  3.1149e-02, -2.7661e-01,
         2.0761e-01,  4.9711e-01, -1.0008e+00, -5.7320e-01,  4.9896e-01,
        -1.1614e-01,  1.4121e-01, -4.8902e-02, -4.1563e-02, -3.6008e-02,
        -1.9402e-01, -1.7994e-01, -8.9036e-01,  9.2474e-01, -9.7483e-01,
         1.8989e-01, -2.3585e-01,  3.9247e-01, -2.3175e-02,  4.1963e-04,
        -3.1328e-01, -2.1220e+00, -1.6761e+00, -3.2020e+00, -1.4035e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        

         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078,  0.3358, -0.2964, -0.2533, -0.3703,  0.1283,
         0.1407, -0.0480,  0.0632,  1.3674, -0.7643,  0.3101, -0.0305,  0.2660,
         1.2382, -0.1131, -0.3213,  0.3047, -0.0589, -0.0235,  0.0912,  0.0793,
         0.0380,  0.0618, -0.2147, -0.2281,  0.3903, -1.4909,  0.3990, -0.1413,
         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009])

41 tensor(51.4716, grad_fn=<MseLossBackward0>) tensor([-2.3031e-01,  1.8165e-01, -1.6926e-01,  4.4679e-01, -4.7382e-01,
        -2.3629e-01, -4.2067e-01,  2.7646e-03,  1.0064e-02, -2.3193e-01,
         2.5040e-01,  9.8199e-01, -1.8288e-01,  3.1149e-02, -2.7661e-01,
         2.0761e-01,  4.9711e-01, -1.0008e+00, -5.7320e-01,  4.9896e-01,
        -1.1614e-01,  1.4121e-01, -4.8902e-02, -4.1563e-02, -3.6008e-02,
        -1.9402e-01, -1.7994e-01, -8.9036e-01,  9.2474e-01, -9.7483e-01,
         1.8989e-01, -2.3585e-01,  3.9247e-01, -2.3175e-02,  4.1963e-04,
        -3.1328e-01, -2.1220e+00, -1.6761e+00, -3.2020e+00, -1.4035e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
41 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01, -5.2306e-04,  4.0915e-01, -4.0601e-01,
        -2.7275e-01, -4.2042e-01,  6.9891e-02,  1.7734e-02, -2.9643e-01,
         3.0144e-01,  4.2596e-01, -2.9568e-01,  1.3551e-01,  7.6462e-02,
         8.7127e-02,  5.5483e-01, -8.9083e-01, -4.6558e-01,  4.0750e-01,
        -2.2863e-02, -3.6804e-02,  6.0646e-02,  1.1635e-01,  3.0602e-02,
        -1.2093e-01, -9.2405e-02, -

44 tensor(51.4716, grad_fn=<MseLossBackward0>) tensor([-2.3031e-01,  1.8165e-01, -1.6926e-01,  4.4679e-01, -4.7382e-01,
        -2.3629e-01, -4.2067e-01,  2.7646e-03,  1.0064e-02, -2.3193e-01,
         2.5040e-01,  9.8199e-01, -1.8288e-01,  3.1149e-02, -2.7661e-01,
         2.0761e-01,  4.9711e-01, -1.0008e+00, -5.7320e-01,  4.9896e-01,
        -1.1614e-01,  1.4121e-01, -4.8902e-02, -4.1563e-02, -3.6008e-02,
        -1.9402e-01, -1.7994e-01, -8.9036e-01,  9.2474e-01, -9.7483e-01,
         1.8989e-01, -2.3585e-01,  3.9247e-01, -2.3175e-02,  4.1963e-04,
        -3.1328e-01, -2.1220e+00, -1.6761e+00, -3.2020e+00, -1.4035e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

44 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01, -5.2306e-04,  4.0915e-01, -4.0601e-01,
        -2.7275e-01, -4.2042e-01,  6.9891e-02,  1.7734e-02, -2.9643e-01,
         3.0144e-01,  4.2596e-01, -2.9568e-01,  1.3551e-01,  7.6462e-02,
         8.7127e-02,  5.5483e-01, -8.9083e-01, -4.6558e-01,  4.0750e-01,
        -2.2863e-02, -3.6804e-02,  6.0646e-02,  1.1635e-01,  3.0602e-02,
        -1.2093e-01, -9.2405e-02, -4.1741e-01,  5.5657e-01, -9.2449e-01,
        -3.9386e-01,  3.9742e-01,  6.4582e-01,  3.9613e-01,  3.0851e-01,
        -3.9311e-01, -2.3022e+00, -1.0920e+00, -2.6709e+00, -1.7015e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

        -2.9437e-01, -2.6589e+00, -1.2610e+00, -3.4484e+00, -1.5088e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
47 tensor(43.1426, grad_fn=<MseLossBackward0>) tensor([-0.0741,  0.0478, -0.0332,  0.2938, -0.4404, -0.3010, -0.4456,  0.0672,
         0.1188, -0.7313,  0.4216,  0.9954, -0.4385,  0.3495,  0.4329,  0.3372,
         0.2655, -2.1865, -0.4205,  0.3603,  0.3063, -0.0889,  0.0716,  0.1156,
         0.0881,  0.0463, -0.2909,  0.1254,  0.0229, -0.6192, -0.1412, -0.0496,
         0.2373,  0.0473,  0.4117, -0.6123, -2.0320, -1.6228, -3.5221, -1.3218]) tensor

48 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655,  0.2267, -0.3228, -0.2686, -0.3301,  0.1126,
        -0.0394, -0.4880,  0.1115,  0.8335, -0.0887, -0.2546, -0.2907, -0.3002,
         0.4460, -2.2253, -0.4600,  0.4040,  0.0984,  0.1270, -0.0244,  0.0132,
        -0.0421, -0.1371, -0.4225, -0.1874,  0.3231, -0.8927,  0.4509, -0.4262,
        -0.1263, -0.3973,  0.0710, -0.7483, -1.5357, -1.7467, -3.6251, -1.2475]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
48 tensor(52.0178, grad_fn=<MseLossBackward0>) tensor([-0.0244,  0.0097,  0.0729

50 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621,  0.3524, -0.4307, -0.2647, -0.4134,  0.1469,
         0.0700, -0.0629, -0.3660,  0.6588, -0.6120,  0.3034,  0.6519,  0.5415,
        -0.0112, -2.9616, -0.4833,  0.3511,  0.2303, -0.1060,  0.1885,  0.2287,
         0.1827, -0.2201, -0.4090,  0.1034,  0.1254, -0.7286,  0.2786, -0.4356,
         0.1601, -0.2720,  0.4215, -0.8392, -2.4366, -1.8569, -4.2604, -1.5827]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(46.5566, grad_fn=<MseLossBackward0>) tensor([-5.0952e-02, -2.4350e-02,

torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(45.9592, grad_fn=<MseLossBackward0>) tensor([-1.9545e-01,  1.7656e-01,  1.2346e-01,  5.0683e-01, -5.9847e-01,
        -3.7502e-01, -6.2273e-01,  1.8041e-03, -1.5291e-01, -2.2742e-01,
         9.9086e-02,  1.0423e+00, -7.0818e-01,  1.8997e-01,  4.0963e-01,
         1.5935e-01,  8.1212e-01, -1.0495e+00, -5.2108e-01,  4.4709e-01,
         1.3682e-01,  1.8651e-01, -1.7748e-01, -4.7157e-02, -1.9580e-01,
        -8.0014e-02, -4.8935e-01, -4.3408e-01,  6.0513e-01, -1.0842e+00,
         9.5608e-02, -4.3940e-02,  1.0034e-01,  1.5085e-02,  1.5917e-01,
        -2.9437e-01, -2.6589e+00, -1.2610e+00, -3.4484e+00, -1.5088e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        

53 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621,  0.3524, -0.4307, -0.2647, -0.4134,  0.1469,
         0.0700, -0.0629, -0.3660,  0.6588, -0.6120,  0.3034,  0.6519,  0.5415,
        -0.0112, -2.9616, -0.4833,  0.3511,  0.2303, -0.1060,  0.1885,  0.2287,
         0.1827, -0.2201, -0.4090,  0.1034,  0.1254, -0.7286,  0.2786, -0.4356,
         0.1601, -0.2720,  0.4215, -0.8392, -2.4366, -1.8569, -4.2604, -1.5827]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
53 tensor(46.5566, grad_fn=<MseLossBackward0>) tensor([-5.0952e-02, -2.4350e-02,

54 tensor(51.4716, grad_fn=<MseLossBackward0>) tensor([-2.3031e-01,  1.8165e-01, -1.6926e-01,  4.4679e-01, -4.7382e-01,
        -2.3629e-01, -4.2067e-01,  2.7646e-03,  1.0064e-02, -2.3193e-01,
         2.5040e-01,  9.8199e-01, -1.8288e-01,  3.1149e-02, -2.7661e-01,
         2.0761e-01,  4.9711e-01, -1.0008e+00, -5.7320e-01,  4.9896e-01,
        -1.1614e-01,  1.4121e-01, -4.8902e-02, -4.1563e-02, -3.6008e-02,
        -1.9402e-01, -1.7994e-01, -8.9036e-01,  9.2474e-01, -9.7483e-01,
         1.8989e-01, -2.3585e-01,  3.9247e-01, -2.3175e-02,  4.1963e-04,
        -3.1328e-01, -2.1220e+00, -1.6761e+00, -3.2020e+00, -1.4035e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

56 tensor(52.6346, grad_fn=<MseLossBackward0>) tensor([ 0.0324,  0.0146, -0.0998,  0.3893, -0.3332, -0.2901, -0.3446, -0.0793,
         0.1571, -0.4194,  0.2125,  1.2375, -0.4658,  0.5467,  0.5942,  0.5274,
         0.1081, -1.8170, -0.4652,  0.4214,  0.0997, -0.0467,  0.1348,  0.2015,
         0.1201, -0.2051, -0.3993, -0.2504,  0.4929, -0.8825, -0.1061,  0.2059,
         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
56 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448

torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573,  0.4532, -0.6506, -0.4613, -0.5968,  0.0656,
         0.0952, -0.1512, -0.2137,  1.0715,  0.0685, -0.6441, -1.3928, -0.7339,
         0.7034, -1.0632, -0.3117,  0.2163, -0.0877, -0.0223,  0.0224,  0.0337,
         0.0300, -0.1995, -0.2968,  0.1634, -0.1488, -0.8258,  0.1696, -0.4593,
         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(48.4883, grad_fn=<MseLossBackw

59 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738,  0.4307, -0.4734, -0.4304, -0.4914,  0.0685,
        -0.1625, -0.0727, -0.1766,  0.6218,  0.2194, -0.5895, -0.1209, -0.5960,
         0.4323, -2.1835, -0.3746,  0.2780,  0.1965, -0.0471,  0.1704,  0.2219,
         0.1486, -0.3160, -0.5000, -0.0172,  0.1347, -0.6282, -0.3691,  0.1746,
         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
59 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078

torch.Size([50, 1])
torch.Size([50, 1])
60 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573,  0.4532, -0.6506, -0.4613, -0.5968,  0.0656,
         0.0952, -0.1512, -0.2137,  1.0715,  0.0685, -0.6441, -1.3928, -0.7339,
         0.7034, -1.0632, -0.3117,  0.2163, -0.0877, -0.0223,  0.0224,  0.0337,
         0.0300, -0.1995, -0.2968,  0.1634, -0.1488, -0.8258,  0.1696, -0.4593,
         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
60 tensor(48.4883, grad_fn=<MseLossBackw

62 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738,  0.4307, -0.4734, -0.4304, -0.4914,  0.0685,
        -0.1625, -0.0727, -0.1766,  0.6218,  0.2194, -0.5895, -0.1209, -0.5960,
         0.4323, -2.1835, -0.3746,  0.2780,  0.1965, -0.0471,  0.1704,  0.2219,
         0.1486, -0.3160, -0.5000, -0.0172,  0.1347, -0.6282, -0.3691,  0.1746,
         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
62 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078

         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
63 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621,  0.3524, -0.4307, -0.2647, -0.4134,  0.1469,
         0.0700, -0.0629, -0.3660,  0.6588, -0.6120,  0.3034,  0.6519,  0.5415,
        -0.0112, -2.9616, -0.4833,  0.3511,  0.2303, -0.1060,  0.1885,  0.2287,
         0.1827, -0.2201, -0.4090,  0.1034,  0.1254, -0.7286,  0.2786, -0.4356,
         0.1601, -0.2720,  0.4215, -0.8392, -2.4366, -1.8569, -4.2604, -1.5827])

65 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078,  0.3358, -0.2964, -0.2533, -0.3703,  0.1283,
         0.1407, -0.0480,  0.0632,  1.3674, -0.7643,  0.3101, -0.0305,  0.2660,
         1.2382, -0.1131, -0.3213,  0.3047, -0.0589, -0.0235,  0.0912,  0.0793,
         0.0380,  0.0618, -0.2147, -0.2281,  0.3903, -1.4909,  0.3990, -0.1413,
         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
65 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01,

         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
66 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621,  0.3524, -0.4307, -0.2647, -0.4134,  0.1469,
         0.0700, -0.0629, -0.3660,  0.6588, -0.6120,  0.3034,  0.6519,  0.5415,
        -0.0112, -2.9616, -0.4833,  0.3511,  0.2303, -0.1060,  0.1885,  0.2287,
         0.1827, -0.2201, -0.4090,  0.1034,  0.1254, -0.7286,  0.2786, -0.4356,
         0.1601, -0.2720,  0.4215, -0.8392, -2.4366, -1.8569, -4.2604, -1.5827])

68 tensor(71.4659, grad_fn=<MseLossBackward0>) tensor([-0.1852,  0.1109,  0.2426,  0.4196, -0.4174, -0.2632, -0.5053, -0.1030,
         0.0136, -0.2213, -0.0530,  1.6284, -0.5875,  0.2606,  0.3083,  0.1234,
         0.5326, -0.9398, -0.6080,  0.4980,  0.1745,  0.2382, -0.1140,  0.0131,
        -0.1477, -0.5111, -0.3966, -0.5980,  0.5926, -1.0620, -0.1476,  0.2158,
         0.4501,  0.2965, -0.2892, -0.3197, -1.9300, -1.1134, -3.9411, -1.1400]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
68 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573

69 tensor(71.4659, grad_fn=<MseLossBackward0>) tensor([-0.1852,  0.1109,  0.2426,  0.4196, -0.4174, -0.2632, -0.5053, -0.1030,
         0.0136, -0.2213, -0.0530,  1.6284, -0.5875,  0.2606,  0.3083,  0.1234,
         0.5326, -0.9398, -0.6080,  0.4980,  0.1745,  0.2382, -0.1140,  0.0131,
        -0.1477, -0.5111, -0.3966, -0.5980,  0.5926, -1.0620, -0.1476,  0.2158,
         0.4501,  0.2965, -0.2892, -0.3197, -1.9300, -1.1134, -3.9411, -1.1400]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
69 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573

         0.4297,  0.0318,  0.5621, -0.7090, -1.6400, -1.8189, -4.7509, -1.2210]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
70 tensor(51.4716, grad_fn=<MseLossBackward0>) tensor([-2.3031e-01,  1.8165e-01, -1.6926e-01,  4.4679e-01, -4.7382e-01,
        -2.3629e-01, -4.2067e-01,  2.7646e-03,  1.0064e-02, -2.3193e-01,
         2.5040e-01,  9.8199e-01, -1.8288e-01,  3.1149e-02, -2.7661e-01,
         2.0761e-01,  4.9711e-01, -1.0008e+00, -5.7320e-01,  4.9896e-01,
        -1.1614e-01,  1.4121e-01, -4.8902e-02, -4.1563e-02, -3.6008e-02,
        -1.9402e-01, -1.7994e-01, -

         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
70 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01, -5.2306e-04,  4.0915e-01, -4.0601e-01,
        -2.7275e-01, -4.2042e-01,  6.9891e-02,  1.7734e-02, -2.9643e-01,
         3.0144e-01,  4.2596e-01, -2.9568e-01,  1.3551e-01,  7.6462e-02,
         8.7127e-02,  5.5483e-01, -8.9083e-01, -4.6558e-01,  4.0750e-01,
        -2.2863e-02, -3.6804e-02,  6.0646e-02,  1.1635e-01,  3.0602e-02,
        -1.2093e-01, -9.2405e-02, -

72 tensor(43.1426, grad_fn=<MseLossBackward0>) tensor([-0.0741,  0.0478, -0.0332,  0.2938, -0.4404, -0.3010, -0.4456,  0.0672,
         0.1188, -0.7313,  0.4216,  0.9954, -0.4385,  0.3495,  0.4329,  0.3372,
         0.2655, -2.1865, -0.4205,  0.3603,  0.3063, -0.0889,  0.0716,  0.1156,
         0.0881,  0.0463, -0.2909,  0.1254,  0.0229, -0.6192, -0.1412, -0.0496,
         0.2373,  0.0473,  0.4117, -0.6123, -2.0320, -1.6228, -3.5221, -1.3218]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
72 tensor(71.4659, grad_fn=<MseLossBackward0>) tensor([-0.1852,  0.1109,  0.2426

         0.0779, -0.1101,  0.2811, -0.6639, -1.9187, -1.4821, -3.6708, -0.8566]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
73 tensor(53.2607, grad_fn=<MseLossBackward0>) tensor([ 0.0191, -0.0238, -0.0738,  0.4307, -0.4734, -0.4304, -0.4914,  0.0685,
        -0.1625, -0.0727, -0.1766,  0.6218,  0.2194, -0.5895, -0.1209, -0.5960,
         0.4323, -2.1835, -0.3746,  0.2780,  0.1965, -0.0471,  0.1704,  0.2219,
         0.1486, -0.3160, -0.5000, -0.0172,  0.1347, -0.6282, -0.3691,  0.1746,
         0.5227,  0.1197,  0.0855, -1.0266, -1.9636, -1.3622, -3.9460, -1.2749])

75 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078,  0.3358, -0.2964, -0.2533, -0.3703,  0.1283,
         0.1407, -0.0480,  0.0632,  1.3674, -0.7643,  0.3101, -0.0305,  0.2660,
         1.2382, -0.1131, -0.3213,  0.3047, -0.0589, -0.0235,  0.0912,  0.0793,
         0.0380,  0.0618, -0.2147, -0.2281,  0.3903, -1.4909,  0.3990, -0.1413,
         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
75 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01,

78 tensor(45.9592, grad_fn=<MseLossBackward0>) tensor([-1.9545e-01,  1.7656e-01,  1.2346e-01,  5.0683e-01, -5.9847e-01,
        -3.7502e-01, -6.2273e-01,  1.8041e-03, -1.5291e-01, -2.2742e-01,
         9.9086e-02,  1.0423e+00, -7.0818e-01,  1.8997e-01,  4.0963e-01,
         1.5935e-01,  8.1212e-01, -1.0495e+00, -5.2108e-01,  4.4709e-01,
         1.3682e-01,  1.8651e-01, -1.7748e-01, -4.7157e-02, -1.9580e-01,
        -8.0014e-02, -4.8935e-01, -4.3408e-01,  6.0513e-01, -1.0842e+00,
         9.5608e-02, -4.3940e-02,  1.0034e-01,  1.5085e-02,  1.5917e-01,
        -2.9437e-01, -2.6589e+00, -1.2610e+00, -3.4484e+00, -1.5088e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

80 tensor(52.6346, grad_fn=<MseLossBackward0>) tensor([ 0.0324,  0.0146, -0.0998,  0.3893, -0.3332, -0.2901, -0.3446, -0.0793,
         0.1571, -0.4194,  0.2125,  1.2375, -0.4658,  0.5467,  0.5942,  0.5274,
         0.1081, -1.8170, -0.4652,  0.4214,  0.0997, -0.0467,  0.1348,  0.2015,
         0.1201, -0.2051, -0.3993, -0.2504,  0.4929, -0.8825, -0.1061,  0.2059,
         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
80 tensor(55.9117, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0723,  0.2448

torch.Size([4, 1])
torch.Size([4, 1])
82 tensor(37.2503, grad_fn=<MseLossBackward0>) tensor([ 0.1090, -0.0330, -0.2826,  0.5981, -0.5834, -0.3019, -0.6095, -0.1607,
         0.3040, -0.5040,  0.1017,  0.5342, -1.0234,  1.1540, -0.5992,  1.0991,
         0.7360, -2.5169, -0.2880,  0.2658, -0.1832, -0.0107,  0.1052, -0.2462,
         0.0935,  0.0531, -0.5584,  0.0868, -0.0958, -0.4390, -0.0056,  0.1783,
         0.3861,  0.1182,  0.2300, -2.1106, -2.8580, -3.5361, -3.3137, -1.5430]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
83 tensor(47.0235, grad_fn=<MseLossBackwar

85 tensor(46.5566, grad_fn=<MseLossBackward0>) tensor([-5.0952e-02, -2.4350e-02,  2.0477e-01,  1.7346e-01, -3.1018e-01,
        -2.5375e-01, -3.4807e-01,  1.8455e-01, -2.6644e-02, -3.8329e-01,
        -5.0870e-02,  9.7430e-01, -3.3182e-01, -2.7975e-01, -1.3494e-01,
        -1.9798e-01,  8.3880e-01, -1.9176e+00, -3.5443e-01,  2.6921e-01,
         2.0183e-01, -3.8693e-02,  3.5878e-02,  1.2302e-01,  1.3757e-03,
        -6.6133e-02, -5.2172e-01,  2.6807e-01, -1.5027e-01, -9.0552e-01,
        -3.2231e-01,  1.7175e-01,  5.2815e-01,  2.4943e-01,  3.7001e-01,
        -4.0797e-01, -1.5751e+00, -1.7819e+00, -3.2785e+00, -1.5227e+00]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0

87 tensor(37.2503, grad_fn=<MseLossBackward0>) tensor([ 0.1090, -0.0330, -0.2826,  0.5981, -0.5834, -0.3019, -0.6095, -0.1607,
         0.3040, -0.5040,  0.1017,  0.5342, -1.0234,  1.1540, -0.5992,  1.0991,
         0.7360, -2.5169, -0.2880,  0.2658, -0.1832, -0.0107,  0.1052, -0.2462,
         0.0935,  0.0531, -0.5584,  0.0868, -0.0958, -0.4390, -0.0056,  0.1783,
         0.3861,  0.1182,  0.2300, -2.1106, -2.8580, -3.5361, -3.3137, -1.5430]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
88 tensor(47.0235, grad_fn=<MseLossBackward0>) tensor([ 0.0152,  0.0092, -0.0655

         0.4297,  0.0318,  0.5621, -0.7090, -1.6400, -1.8189, -4.7509, -1.2210]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
90 tensor(51.4716, grad_fn=<MseLossBackward0>) tensor([-2.3031e-01,  1.8165e-01, -1.6926e-01,  4.4679e-01, -4.7382e-01,
        -2.3629e-01, -4.2067e-01,  2.7646e-03,  1.0064e-02, -2.3193e-01,
         2.5040e-01,  9.8199e-01, -1.8288e-01,  3.1149e-02, -2.7661e-01,
         2.0761e-01,  4.9711e-01, -1.0008e+00, -5.7320e-01,  4.9896e-01,
        -1.1614e-01,  1.4121e-01, -4.8902e-02, -4.1563e-02, -3.6008e-02,
        -1.9402e-01, -1.7994e-01, -

92 tensor(57.7881, grad_fn=<MseLossBackward0>) tensor([ 0.1180, -0.1608,  0.0573,  0.4532, -0.6506, -0.4613, -0.5968,  0.0656,
         0.0952, -0.1512, -0.2137,  1.0715,  0.0685, -0.6441, -1.3928, -0.7339,
         0.7034, -1.0632, -0.3117,  0.2163, -0.0877, -0.0223,  0.0224,  0.0337,
         0.0300, -0.1995, -0.2968,  0.1634, -0.1488, -0.8258,  0.1696, -0.4593,
         0.2278, -0.3143,  0.2370, -0.1074, -2.6811, -1.7609, -2.0528, -1.0219]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
92 tensor(48.4883, grad_fn=<MseLossBackward0>) tensor([-0.0187,  0.0578, -0.0621

torch.Size([50, 1])
torch.Size([50, 1])
95 tensor(52.0178, grad_fn=<MseLossBackward0>) tensor([-0.0244,  0.0097,  0.0729,  0.2128, -0.2789, -0.2013, -0.2983,  0.1257,
         0.0774, -0.8785,  0.3474,  2.2168, -0.5825,  0.6776,  0.7346,  0.5819,
        -0.0248, -2.5566, -0.5707,  0.3934,  0.3354, -0.0720,  0.1533,  0.2485,
         0.1621, -0.2508, -0.4534, -0.5423,  0.4441, -0.3449, -0.2889, -0.0327,
         0.4297,  0.0318,  0.5621, -0.7090, -1.6400, -1.8189, -4.7509, -1.2210]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
95 tensor(51.4716, grad_fn=<MseLossBackw

torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(52.6346, grad_fn=<MseLossBackward0>) tensor([ 0.0324,  0.0146, -0.0998,  0.3893, -0.3332, -0.2901, -0.3446, -0.0793,
         0.1571, -0.4194,  0.2125,  1.2375, -0.4658,  0.5467,  0.5942,  0.5274,
         0.1081, -1.8170, -0.4652,  0.4214,  0.0997, -0.0467,  0.1348,  0.2015,
         0.1201, -0.2051, -0.3993, -0.2504,  0.4929, -0.8825, -0.1061,  0.2059,
         0.4398,  0.2954, -0.0057, -0.1840, -1.6867, -1.4499, -4.0686, -1.3261]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(55.9117, grad_fn=<MseLossBackw

99 tensor(45.2610, grad_fn=<MseLossBackward0>) tensor([ 0.1354, -0.0807, -0.0078,  0.3358, -0.2964, -0.2533, -0.3703,  0.1283,
         0.1407, -0.0480,  0.0632,  1.3674, -0.7643,  0.3101, -0.0305,  0.2660,
         1.2382, -0.1131, -0.3213,  0.3047, -0.0589, -0.0235,  0.0912,  0.0793,
         0.0380,  0.0618, -0.2147, -0.2281,  0.3903, -1.4909,  0.3990, -0.1413,
         0.0297, -0.3017, -0.0355, -0.3138, -2.1324, -1.5592, -2.4731, -1.6009]) tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
99 tensor(44.2423, grad_fn=<MseLossBackward0>) tensor([-1.5409e-01,  1.6859e-01,

In [12]:
parameters_to_vector(model.parameters())

tensor([-0.2042, -0.2578, -0.0603, -0.2144,  0.1683, -0.1041,  0.1239, -0.2471,
        -0.1132, -0.2566, -0.2238, -0.0980,  0.0091, -0.0050,  0.3077, -0.3188,
        -0.2549,  0.2537,  0.1664, -0.3067, -0.1337, -0.1716, -0.2813,  0.0414,
        -0.1889, -0.0123,  0.2228, -0.2023, -0.2683,  0.2724, -0.0689,  0.0237,
        -0.1276, -0.0920, -0.2745,  0.1189,  0.0916,  0.3737,  0.0862,  0.3118],
       grad_fn=<CatBackward0>)

In [13]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [14]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3
                                   )

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                #grads = grads_to_vector(model.parameters())
                optimizer.step()
                train_loss += loss.item()*data.size(0)
                
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [15]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0 + 1.0 * float(np.random.randn(1)))
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [16]:
print(projection_list[0])

[0, tensor([[1.0529, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.0529, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.0529,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.0529, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.0529, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0529]]), 0, tensor([[1.2510, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.2510, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.2510,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.2510, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.2510, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.2510]]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [17]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(X_test, y_test), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [18]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [19]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([-0.0016,  0.0798,  0.1991, -0.2646,  0.1492, -0.1215,  0.2237, -0.1344,
        -0.2093,  0.3116, -0.0348, -0.1772, -0.1844, -0.0705, -0.2113,  0.2130,
        -0.1988,  0.1714,  0.1038, -0.1824,  0.1971,  0.2693, -0.1247, -0.0079,
         0.0953,  0.2047, -0.0031, -0.2782, -0.1369, -0.0619,  0.2180,  0.1150,
         0.3067,  0.1449, -0.0419,  0.2904, -0.0664,  0.4803,  0.1630,  0.1145])


tensor([-0.0033,  0.1597,  0.3982, -0.5293,  0.2984, -0.2430,  0.4475, -0.2687,
        -0.4186,  0.6232, -0.0696, -0.3543, -0.3689, -0.1409, -0.4226,  0.4259,
        -0.3976,  0.3429,  0.2077, -0.3648,  0.3941,  0.5387, -0.2494, -0.0157,
         0.1906,  0.4094, -0.0061, -0.5564, -0.2738, -0.1238,  0.4359,  0.2299,
         0.6134,  0.2899, -0.0839,  0.5808, -0.1329,  0.9607,  0.3261,  0.2290],
       grad_fn=<CatBackward0>)

In [20]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    
    #Share and mix the local weights
    
    
    for i in range(no_users):
        weights = parameters_to_vector(dummy_models[i].parameters())
        mat_vec_sum = torch.zeros_like(weights)
        for j in G.neighbors(i):
            mat_vec_sum = torch.add(mat_vec_sum, parameters_to_vector(dummy_models[j].parameters()))
        
        new_weights = weights - mu * eta * (degree_list[i] * weights - mat_vec_sum)
        # Update real models
        vector_to_parameters(parameters=models[i].parameters(), vec=new_weights)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        #user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #total_rel_error.append(user_rel_error / no_users)
    #g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/2000 [00:02<1:14:26,  2.23s/it]

Training_loss 47.50122


  0%|          | 2/2000 [00:04<1:05:20,  1.96s/it]

Training_loss 47.47722


  0%|          | 3/2000 [00:05<58:49,  1.77s/it]  

Training_loss 47.45194


  0%|          | 4/2000 [00:06<53:36,  1.61s/it]

Training_loss 47.42368


  0%|          | 5/2000 [00:08<51:34,  1.55s/it]

Training_loss 47.40134


  0%|          | 6/2000 [00:09<47:03,  1.42s/it]

Training_loss 47.36980


  0%|          | 7/2000 [00:10<44:34,  1.34s/it]

Training_loss 47.34242


  0%|          | 8/2000 [00:11<42:45,  1.29s/it]

Training_loss 47.31447


  0%|          | 9/2000 [00:13<46:45,  1.41s/it]

Training_loss 47.28590


  0%|          | 10/2000 [00:17<1:11:03,  2.14s/it]

Training_loss 47.25982


  1%|          | 11/2000 [00:20<1:21:46,  2.47s/it]

Training_loss 47.22600


  1%|          | 12/2000 [00:21<1:10:08,  2.12s/it]

Training_loss 47.18830


  1%|          | 13/2000 [00:23<1:01:08,  1.85s/it]

Training_loss 47.15880


  1%|          | 14/2000 [00:24<54:13,  1.64s/it]  

Training_loss 47.13019


  1%|          | 15/2000 [00:25<49:19,  1.49s/it]

Training_loss 47.10204


  1%|          | 16/2000 [00:26<46:08,  1.40s/it]

Training_loss 47.07518


  1%|          | 17/2000 [00:27<42:56,  1.30s/it]

Training_loss 47.04780


  1%|          | 18/2000 [00:28<42:47,  1.30s/it]

Training_loss 47.02574


  1%|          | 19/2000 [00:30<41:26,  1.25s/it]

Training_loss 47.00089


  1%|          | 20/2000 [00:32<49:53,  1.51s/it]

Training_loss 46.95987


  1%|          | 21/2000 [00:35<1:02:57,  1.91s/it]

Training_loss 46.92978


  1%|          | 22/2000 [00:38<1:14:29,  2.26s/it]

Training_loss 46.89665


  1%|          | 23/2000 [00:40<1:19:30,  2.41s/it]

Training_loss 46.86752


  1%|          | 24/2000 [00:42<1:12:54,  2.21s/it]

Training_loss 46.83750


  1%|▏         | 25/2000 [00:44<1:10:22,  2.14s/it]

Training_loss 46.81230


  1%|▏         | 26/2000 [00:45<1:02:11,  1.89s/it]

Training_loss 46.77620


  1%|▏         | 27/2000 [00:47<55:26,  1.69s/it]  

Training_loss 46.74307


  1%|▏         | 28/2000 [00:48<50:20,  1.53s/it]

Training_loss 46.71582


  1%|▏         | 29/2000 [00:49<46:37,  1.42s/it]

Training_loss 46.68372


  2%|▏         | 30/2000 [00:50<46:43,  1.42s/it]

Training_loss 46.64638


  2%|▏         | 31/2000 [00:52<44:12,  1.35s/it]

Training_loss 46.61412


  2%|▏         | 32/2000 [00:53<42:41,  1.30s/it]

Training_loss 46.58105


  2%|▏         | 33/2000 [00:54<44:14,  1.35s/it]

Training_loss 46.54498


  2%|▏         | 34/2000 [00:56<44:49,  1.37s/it]

Training_loss 46.51389


  2%|▏         | 35/2000 [00:57<43:46,  1.34s/it]

Training_loss 46.47924


  2%|▏         | 36/2000 [00:58<42:15,  1.29s/it]

Training_loss 46.45482


  2%|▏         | 37/2000 [01:00<44:55,  1.37s/it]

Training_loss 46.42862


  2%|▏         | 38/2000 [01:01<42:40,  1.31s/it]

Training_loss 46.39764


  2%|▏         | 39/2000 [01:02<41:28,  1.27s/it]

Training_loss 46.35492


  2%|▏         | 40/2000 [01:03<39:33,  1.21s/it]

Training_loss 46.31465


  2%|▏         | 41/2000 [01:04<41:35,  1.27s/it]

Training_loss 46.27888


  2%|▏         | 42/2000 [01:06<43:02,  1.32s/it]

Training_loss 46.23637


  2%|▏         | 43/2000 [01:07<41:39,  1.28s/it]

Training_loss 46.20146


  2%|▏         | 44/2000 [01:08<40:33,  1.24s/it]

Training_loss 46.17178


  2%|▏         | 45/2000 [01:09<38:25,  1.18s/it]

Training_loss 46.13396


  2%|▏         | 46/2000 [01:10<37:27,  1.15s/it]

Training_loss 46.10149


  2%|▏         | 47/2000 [01:11<36:30,  1.12s/it]

Training_loss 46.06724


  2%|▏         | 48/2000 [01:12<36:18,  1.12s/it]

Training_loss 46.02667


  2%|▏         | 49/2000 [01:14<35:51,  1.10s/it]

Training_loss 45.98691


  2%|▎         | 50/2000 [01:15<35:05,  1.08s/it]

Training_loss 45.95250


  3%|▎         | 51/2000 [01:16<34:51,  1.07s/it]

Training_loss 45.90993


  3%|▎         | 52/2000 [01:17<35:00,  1.08s/it]

Training_loss 45.87055


  3%|▎         | 53/2000 [01:18<35:15,  1.09s/it]

Training_loss 45.83893


  3%|▎         | 54/2000 [01:19<35:28,  1.09s/it]

Training_loss 45.79618


  3%|▎         | 55/2000 [01:21<45:11,  1.39s/it]

Training_loss 45.75651


  3%|▎         | 56/2000 [01:24<58:32,  1.81s/it]

Training_loss 45.70696


  3%|▎         | 57/2000 [01:26<59:11,  1.83s/it]

Training_loss 45.66443


  3%|▎         | 58/2000 [01:27<53:33,  1.65s/it]

Training_loss 45.62149


  3%|▎         | 59/2000 [01:28<49:26,  1.53s/it]

Training_loss 45.58269


  3%|▎         | 60/2000 [01:30<48:06,  1.49s/it]

Training_loss 45.53297


  3%|▎         | 61/2000 [01:31<47:57,  1.48s/it]

Training_loss 45.49160


  3%|▎         | 62/2000 [01:33<48:17,  1.49s/it]

Training_loss 45.45081


  3%|▎         | 63/2000 [01:34<50:23,  1.56s/it]

Training_loss 45.40187


  3%|▎         | 64/2000 [01:36<51:09,  1.59s/it]

Training_loss 45.34900


  3%|▎         | 65/2000 [01:37<49:59,  1.55s/it]

Training_loss 45.29430


  3%|▎         | 66/2000 [01:39<47:43,  1.48s/it]

Training_loss 45.25715


  3%|▎         | 67/2000 [01:40<43:38,  1.35s/it]

Training_loss 45.21195


  3%|▎         | 68/2000 [01:41<43:46,  1.36s/it]

Training_loss 45.15655


  3%|▎         | 69/2000 [01:43<50:01,  1.55s/it]

Training_loss 45.13229


  4%|▎         | 70/2000 [01:45<54:45,  1.70s/it]

Training_loss 45.07688


  4%|▎         | 71/2000 [01:47<51:50,  1.61s/it]

Training_loss 45.02757


  4%|▎         | 72/2000 [01:49<1:00:15,  1.88s/it]

Training_loss 44.98476


  4%|▎         | 73/2000 [01:51<58:09,  1.81s/it]  

Training_loss 44.94724


  4%|▎         | 74/2000 [01:53<1:00:28,  1.88s/it]

Training_loss 44.90622


  4%|▍         | 75/2000 [01:54<56:42,  1.77s/it]  

Training_loss 44.84074


  4%|▍         | 76/2000 [01:56<51:31,  1.61s/it]

Training_loss 44.80746


  4%|▍         | 77/2000 [01:57<46:48,  1.46s/it]

Training_loss 44.74926


  4%|▍         | 78/2000 [01:58<45:33,  1.42s/it]

Training_loss 44.71129


  4%|▍         | 79/2000 [01:59<45:36,  1.42s/it]

Training_loss 44.65209


  4%|▍         | 80/2000 [02:01<43:31,  1.36s/it]

Training_loss 44.60096


  4%|▍         | 81/2000 [02:02<43:17,  1.35s/it]

Training_loss 44.54455


  4%|▍         | 82/2000 [02:03<43:44,  1.37s/it]

Training_loss 44.49122


  4%|▍         | 83/2000 [02:05<42:27,  1.33s/it]

Training_loss 44.44430


  4%|▍         | 84/2000 [02:06<42:04,  1.32s/it]

Training_loss 44.38642


  4%|▍         | 85/2000 [02:07<42:42,  1.34s/it]

Training_loss 44.31814


  4%|▍         | 86/2000 [02:09<47:44,  1.50s/it]

Training_loss 44.26517


  4%|▍         | 87/2000 [02:11<47:13,  1.48s/it]

Training_loss 44.22431


  4%|▍         | 88/2000 [02:12<43:10,  1.35s/it]

Training_loss 44.16751


  4%|▍         | 89/2000 [02:13<42:18,  1.33s/it]

Training_loss 44.11950


  4%|▍         | 90/2000 [02:14<44:10,  1.39s/it]

Training_loss 44.06384


  5%|▍         | 91/2000 [02:16<43:54,  1.38s/it]

Training_loss 44.01513


  5%|▍         | 92/2000 [02:17<42:59,  1.35s/it]

Training_loss 43.96999


  5%|▍         | 93/2000 [02:18<40:15,  1.27s/it]

Training_loss 43.92140


  5%|▍         | 94/2000 [02:19<40:10,  1.26s/it]

Training_loss 43.83853


  5%|▍         | 95/2000 [02:21<41:27,  1.31s/it]

Training_loss 43.77061


  5%|▍         | 96/2000 [02:22<42:01,  1.32s/it]

Training_loss 43.71646


  5%|▍         | 97/2000 [02:23<41:08,  1.30s/it]

Training_loss 43.66171


  5%|▍         | 98/2000 [02:25<41:03,  1.30s/it]

Training_loss 43.59085


  5%|▍         | 99/2000 [02:27<54:21,  1.72s/it]

Training_loss 43.51114


  5%|▌         | 100/2000 [02:29<52:41,  1.66s/it]

Training_loss 43.44115


  5%|▌         | 101/2000 [02:31<52:49,  1.67s/it]

Training_loss 43.38759


  5%|▌         | 102/2000 [02:33<55:24,  1.75s/it]

Training_loss 43.33112


  5%|▌         | 103/2000 [02:36<1:13:19,  2.32s/it]

Training_loss 43.26510


  5%|▌         | 104/2000 [02:39<1:18:24,  2.48s/it]

Training_loss 43.21849


  5%|▌         | 105/2000 [02:41<1:08:22,  2.17s/it]

Training_loss 43.14287


  5%|▌         | 106/2000 [02:42<1:01:57,  1.96s/it]

Training_loss 43.09325


  5%|▌         | 107/2000 [02:44<58:18,  1.85s/it]  

Training_loss 43.03370


  5%|▌         | 108/2000 [02:45<53:55,  1.71s/it]

Training_loss 42.97876


  5%|▌         | 109/2000 [02:47<52:32,  1.67s/it]

Training_loss 42.90933


  6%|▌         | 110/2000 [02:48<48:12,  1.53s/it]

Training_loss 42.82350


  6%|▌         | 111/2000 [02:50<52:33,  1.67s/it]

Training_loss 42.76266


  6%|▌         | 112/2000 [02:51<50:14,  1.60s/it]

Training_loss 42.69984


  6%|▌         | 113/2000 [02:52<46:52,  1.49s/it]

Training_loss 42.62981


  6%|▌         | 114/2000 [02:54<44:17,  1.41s/it]

Training_loss 42.54180


  6%|▌         | 115/2000 [02:55<43:07,  1.37s/it]

Training_loss 42.48498


  6%|▌         | 116/2000 [02:57<47:07,  1.50s/it]

Training_loss 42.42182


  6%|▌         | 117/2000 [03:01<1:09:05,  2.20s/it]

Training_loss 42.33910


  6%|▌         | 118/2000 [03:03<1:09:02,  2.20s/it]

Training_loss 42.28146


  6%|▌         | 119/2000 [03:05<1:05:32,  2.09s/it]

Training_loss 42.18621


  6%|▌         | 120/2000 [03:06<1:01:33,  1.96s/it]

Training_loss 42.09888


  6%|▌         | 121/2000 [03:08<57:26,  1.83s/it]  

Training_loss 42.04027


  6%|▌         | 122/2000 [03:10<56:48,  1.81s/it]

Training_loss 41.99609


  6%|▌         | 123/2000 [03:11<57:43,  1.85s/it]

Training_loss 41.92156


  6%|▌         | 124/2000 [03:14<59:40,  1.91s/it]

Training_loss 41.79834


  6%|▋         | 125/2000 [03:15<58:13,  1.86s/it]

Training_loss 41.71199


  6%|▋         | 126/2000 [03:17<52:19,  1.68s/it]

Training_loss 41.64640


  6%|▋         | 127/2000 [03:18<46:45,  1.50s/it]

Training_loss 41.58348


  6%|▋         | 128/2000 [03:19<42:12,  1.35s/it]

Training_loss 41.48789


  6%|▋         | 129/2000 [03:20<39:11,  1.26s/it]

Training_loss 41.41399


  6%|▋         | 130/2000 [03:21<37:09,  1.19s/it]

Training_loss 41.34046


  7%|▋         | 131/2000 [03:22<36:36,  1.18s/it]

Training_loss 41.27782


  7%|▋         | 132/2000 [03:23<34:37,  1.11s/it]

Training_loss 41.18521


  7%|▋         | 133/2000 [03:24<34:50,  1.12s/it]

Training_loss 41.08309


  7%|▋         | 134/2000 [03:25<36:07,  1.16s/it]

Training_loss 40.97290


  7%|▋         | 135/2000 [03:26<35:37,  1.15s/it]

Training_loss 40.87214


  7%|▋         | 136/2000 [03:27<34:48,  1.12s/it]

Training_loss 40.79117


  7%|▋         | 137/2000 [03:29<35:26,  1.14s/it]

Training_loss 40.72107


  7%|▋         | 138/2000 [03:30<41:03,  1.32s/it]

Training_loss 40.63484


  7%|▋         | 139/2000 [03:35<1:08:53,  2.22s/it]

Training_loss 40.51239


  7%|▋         | 140/2000 [03:36<1:01:21,  1.98s/it]

Training_loss 40.45414


  7%|▋         | 141/2000 [03:37<53:10,  1.72s/it]  

Training_loss 40.37059


  7%|▋         | 142/2000 [03:38<47:18,  1.53s/it]

Training_loss 40.26216


  7%|▋         | 143/2000 [03:39<43:18,  1.40s/it]

Training_loss 40.15592


  7%|▋         | 144/2000 [03:40<40:20,  1.30s/it]

Training_loss 40.07136


  7%|▋         | 145/2000 [03:42<38:50,  1.26s/it]

Training_loss 39.98586


  7%|▋         | 146/2000 [03:43<37:20,  1.21s/it]

Training_loss 39.89648


  7%|▋         | 147/2000 [03:44<36:38,  1.19s/it]

Training_loss 39.83929


  7%|▋         | 148/2000 [03:45<35:01,  1.13s/it]

Training_loss 39.75480


  7%|▋         | 149/2000 [03:46<35:11,  1.14s/it]

Training_loss 39.66189


  8%|▊         | 150/2000 [03:47<35:02,  1.14s/it]

Training_loss 39.55683


  8%|▊         | 151/2000 [03:50<51:18,  1.66s/it]

Training_loss 39.47704


  8%|▊         | 152/2000 [03:53<59:25,  1.93s/it]

Training_loss 39.39407


  8%|▊         | 153/2000 [03:56<1:12:25,  2.35s/it]

Training_loss 39.29597


  8%|▊         | 154/2000 [03:57<1:05:14,  2.12s/it]

Training_loss 39.20920


  8%|▊         | 155/2000 [03:59<58:35,  1.91s/it]  

Training_loss 39.09684


  8%|▊         | 156/2000 [04:00<52:11,  1.70s/it]

Training_loss 39.00222


  8%|▊         | 157/2000 [04:01<48:34,  1.58s/it]

Training_loss 38.88614


  8%|▊         | 158/2000 [04:03<46:00,  1.50s/it]

Training_loss 38.79460


  8%|▊         | 159/2000 [04:04<42:00,  1.37s/it]

Training_loss 38.69092


  8%|▊         | 160/2000 [04:05<38:52,  1.27s/it]

Training_loss 38.60470


  8%|▊         | 161/2000 [04:06<37:08,  1.21s/it]

Training_loss 38.49851


  8%|▊         | 162/2000 [04:07<35:36,  1.16s/it]

Training_loss 38.39451


  8%|▊         | 163/2000 [04:08<33:50,  1.11s/it]

Training_loss 38.28444


  8%|▊         | 164/2000 [04:09<33:00,  1.08s/it]

Training_loss 38.20016


  8%|▊         | 165/2000 [04:10<32:46,  1.07s/it]

Training_loss 38.09667


  8%|▊         | 166/2000 [04:11<33:16,  1.09s/it]

Training_loss 38.01736


  8%|▊         | 167/2000 [04:12<32:48,  1.07s/it]

Training_loss 37.92019


  8%|▊         | 168/2000 [04:13<32:20,  1.06s/it]

Training_loss 37.82261


  8%|▊         | 169/2000 [04:14<31:58,  1.05s/it]

Training_loss 37.74913


  8%|▊         | 170/2000 [04:15<33:04,  1.08s/it]

Training_loss 37.66148


  9%|▊         | 171/2000 [04:16<31:50,  1.04s/it]

Training_loss 37.59764


  9%|▊         | 172/2000 [04:18<34:33,  1.13s/it]

Training_loss 37.47137


  9%|▊         | 173/2000 [04:23<1:16:14,  2.50s/it]

Training_loss 37.38581


  9%|▊         | 174/2000 [04:27<1:29:16,  2.93s/it]

Training_loss 37.27895


  9%|▉         | 175/2000 [04:28<1:13:46,  2.43s/it]

Training_loss 37.18491


  9%|▉         | 176/2000 [04:30<1:00:50,  2.00s/it]

Training_loss 37.03078


  9%|▉         | 177/2000 [04:31<52:49,  1.74s/it]  

Training_loss 36.90646


  9%|▉         | 178/2000 [04:32<45:44,  1.51s/it]

Training_loss 36.80614


  9%|▉         | 179/2000 [04:33<41:38,  1.37s/it]

Training_loss 36.72405


  9%|▉         | 180/2000 [04:34<40:47,  1.34s/it]

Training_loss 36.60846


  9%|▉         | 181/2000 [04:35<38:52,  1.28s/it]

Training_loss 36.51982


  9%|▉         | 182/2000 [04:36<36:54,  1.22s/it]

Training_loss 36.42814


  9%|▉         | 183/2000 [04:37<35:47,  1.18s/it]

Training_loss 36.33465


  9%|▉         | 184/2000 [04:38<34:32,  1.14s/it]

Training_loss 36.22924


  9%|▉         | 185/2000 [04:39<34:34,  1.14s/it]

Training_loss 36.13268


  9%|▉         | 186/2000 [04:40<32:53,  1.09s/it]

Training_loss 36.02564


  9%|▉         | 187/2000 [04:41<32:22,  1.07s/it]

Training_loss 35.91573


  9%|▉         | 188/2000 [04:43<33:20,  1.10s/it]

Training_loss 35.84924


  9%|▉         | 189/2000 [04:44<32:40,  1.08s/it]

Training_loss 35.78822


 10%|▉         | 190/2000 [04:45<32:28,  1.08s/it]

Training_loss 35.70016


 10%|▉         | 191/2000 [04:46<32:08,  1.07s/it]

Training_loss 35.61227


 10%|▉         | 192/2000 [04:47<32:23,  1.08s/it]

Training_loss 35.52122


 10%|▉         | 193/2000 [04:48<32:48,  1.09s/it]

Training_loss 35.41811


 10%|▉         | 194/2000 [04:49<32:29,  1.08s/it]

Training_loss 35.30638


 10%|▉         | 195/2000 [04:50<34:22,  1.14s/it]

Training_loss 35.22697


 10%|▉         | 196/2000 [04:51<33:27,  1.11s/it]

Training_loss 35.13995


 10%|▉         | 197/2000 [04:52<32:29,  1.08s/it]

Training_loss 35.05042


 10%|▉         | 198/2000 [04:53<32:03,  1.07s/it]

Training_loss 34.94357


 10%|▉         | 199/2000 [04:55<32:48,  1.09s/it]

Training_loss 34.81896


 10%|█         | 200/2000 [04:56<31:54,  1.06s/it]

Training_loss 34.70563


 10%|█         | 201/2000 [04:57<32:20,  1.08s/it]

Training_loss 34.61097


 10%|█         | 202/2000 [04:58<31:34,  1.05s/it]

Training_loss 34.49183


 10%|█         | 203/2000 [04:59<31:06,  1.04s/it]

Training_loss 34.38367


 10%|█         | 204/2000 [05:00<29:56,  1.00s/it]

Training_loss 34.28548


 10%|█         | 205/2000 [05:01<29:59,  1.00s/it]

Training_loss 34.21475


 10%|█         | 206/2000 [05:02<30:28,  1.02s/it]

Training_loss 34.10363


 10%|█         | 207/2000 [05:03<30:48,  1.03s/it]

Training_loss 34.02384


 10%|█         | 208/2000 [05:04<31:12,  1.04s/it]

Training_loss 33.91821


 10%|█         | 209/2000 [05:05<30:18,  1.02s/it]

Training_loss 33.80422


 10%|█         | 210/2000 [05:10<1:07:37,  2.27s/it]

Training_loss 33.68533


 11%|█         | 211/2000 [05:11<59:37,  2.00s/it]  

Training_loss 33.60929


 11%|█         | 212/2000 [05:13<52:43,  1.77s/it]

Training_loss 33.53476


 11%|█         | 213/2000 [05:14<48:27,  1.63s/it]

Training_loss 33.47795


 11%|█         | 214/2000 [05:16<51:18,  1.72s/it]

Training_loss 33.39180


 11%|█         | 215/2000 [05:17<47:45,  1.61s/it]

Training_loss 33.31331


 11%|█         | 216/2000 [05:19<49:21,  1.66s/it]

Training_loss 33.19834


 11%|█         | 217/2000 [05:20<47:56,  1.61s/it]

Training_loss 33.12982


 11%|█         | 218/2000 [05:22<48:29,  1.63s/it]

Training_loss 33.07505


 11%|█         | 219/2000 [05:23<45:08,  1.52s/it]

Training_loss 32.96935


 11%|█         | 220/2000 [05:24<40:50,  1.38s/it]

Training_loss 32.88398


 11%|█         | 221/2000 [05:25<37:04,  1.25s/it]

Training_loss 32.81530


 11%|█         | 222/2000 [05:27<36:53,  1.24s/it]

Training_loss 32.72483


 11%|█         | 223/2000 [05:28<38:35,  1.30s/it]

Training_loss 32.64508


 11%|█         | 224/2000 [05:30<42:11,  1.43s/it]

Training_loss 32.59123


 11%|█▏        | 225/2000 [05:31<42:39,  1.44s/it]

Training_loss 32.47421


 11%|█▏        | 226/2000 [05:32<40:59,  1.39s/it]

Training_loss 32.39656


 11%|█▏        | 227/2000 [05:33<37:45,  1.28s/it]

Training_loss 32.31168


 11%|█▏        | 228/2000 [05:35<37:02,  1.25s/it]

Training_loss 32.18304


 11%|█▏        | 229/2000 [05:36<36:17,  1.23s/it]

Training_loss 32.08999


 12%|█▏        | 230/2000 [05:37<36:03,  1.22s/it]

Training_loss 32.00794


 12%|█▏        | 231/2000 [05:38<36:41,  1.24s/it]

Training_loss 31.96462


 12%|█▏        | 232/2000 [05:40<39:51,  1.35s/it]

Training_loss 31.84385


 12%|█▏        | 233/2000 [05:41<39:19,  1.34s/it]

Training_loss 31.75634


 12%|█▏        | 234/2000 [05:43<41:44,  1.42s/it]

Training_loss 31.69437


 12%|█▏        | 235/2000 [05:44<39:53,  1.36s/it]

Training_loss 31.60371


 12%|█▏        | 236/2000 [05:45<37:19,  1.27s/it]

Training_loss 31.50780


 12%|█▏        | 237/2000 [05:46<36:53,  1.26s/it]

Training_loss 31.42027


 12%|█▏        | 238/2000 [05:48<36:37,  1.25s/it]

Training_loss 31.30148


 12%|█▏        | 239/2000 [05:49<38:30,  1.31s/it]

Training_loss 31.20841


 12%|█▏        | 240/2000 [05:50<38:53,  1.33s/it]

Training_loss 31.12587


 12%|█▏        | 241/2000 [05:52<38:22,  1.31s/it]

Training_loss 31.02139


 12%|█▏        | 242/2000 [05:53<37:35,  1.28s/it]

Training_loss 30.95689


 12%|█▏        | 243/2000 [05:54<38:56,  1.33s/it]

Training_loss 30.88522


 12%|█▏        | 244/2000 [05:56<38:36,  1.32s/it]

Training_loss 30.79330


 12%|█▏        | 245/2000 [05:57<41:42,  1.43s/it]

Training_loss 30.70608


 12%|█▏        | 246/2000 [06:00<48:40,  1.66s/it]

Training_loss 30.65725


 12%|█▏        | 247/2000 [06:01<50:37,  1.73s/it]

Training_loss 30.56667


 12%|█▏        | 248/2000 [06:04<54:10,  1.86s/it]

Training_loss 30.46653


 12%|█▏        | 249/2000 [06:05<51:13,  1.76s/it]

Training_loss 30.40777


 12%|█▎        | 250/2000 [06:06<46:38,  1.60s/it]

Training_loss 30.30069


 13%|█▎        | 251/2000 [06:08<51:12,  1.76s/it]

Training_loss 30.17639


 13%|█▎        | 252/2000 [06:10<53:58,  1.85s/it]

Training_loss 30.06976


 13%|█▎        | 253/2000 [06:12<53:04,  1.82s/it]

Training_loss 29.94256


 13%|█▎        | 254/2000 [06:15<1:01:00,  2.10s/it]

Training_loss 29.84365


 13%|█▎        | 255/2000 [06:16<55:40,  1.91s/it]  

Training_loss 29.75261


 13%|█▎        | 256/2000 [06:19<1:00:12,  2.07s/it]

Training_loss 29.69490


 13%|█▎        | 257/2000 [06:20<55:02,  1.90s/it]  

Training_loss 29.55665


 13%|█▎        | 258/2000 [06:22<53:34,  1.85s/it]

Training_loss 29.50300


 13%|█▎        | 259/2000 [06:25<1:04:43,  2.23s/it]

Training_loss 29.42565


 13%|█▎        | 260/2000 [06:28<1:10:53,  2.44s/it]

Training_loss 29.35649


 13%|█▎        | 261/2000 [06:32<1:21:05,  2.80s/it]

Training_loss 29.33075


 13%|█▎        | 262/2000 [06:34<1:14:52,  2.58s/it]

Training_loss 29.24576


 13%|█▎        | 263/2000 [06:35<1:03:51,  2.21s/it]

Training_loss 29.19380


 13%|█▎        | 264/2000 [06:36<54:34,  1.89s/it]  

Training_loss 29.07148


 13%|█▎        | 265/2000 [06:37<47:39,  1.65s/it]

Training_loss 28.97681


 13%|█▎        | 266/2000 [06:39<43:22,  1.50s/it]

Training_loss 28.87321


 13%|█▎        | 267/2000 [06:40<41:36,  1.44s/it]

Training_loss 28.82137


 13%|█▎        | 268/2000 [06:41<42:31,  1.47s/it]

Training_loss 28.72936


 13%|█▎        | 269/2000 [06:44<51:08,  1.77s/it]

Training_loss 28.64951


 14%|█▎        | 270/2000 [06:45<48:41,  1.69s/it]

Training_loss 28.56772


 14%|█▎        | 271/2000 [06:47<44:09,  1.53s/it]

Training_loss 28.47497


 14%|█▎        | 272/2000 [06:48<41:18,  1.43s/it]

Training_loss 28.40709


 14%|█▎        | 273/2000 [06:49<38:09,  1.33s/it]

Training_loss 28.32213


 14%|█▎        | 274/2000 [06:50<34:45,  1.21s/it]

Training_loss 28.22172


 14%|█▍        | 275/2000 [06:51<32:32,  1.13s/it]

Training_loss 28.10048


 14%|█▍        | 276/2000 [06:52<31:22,  1.09s/it]

Training_loss 28.01198


 14%|█▍        | 277/2000 [06:53<30:04,  1.05s/it]

Training_loss 27.92650


 14%|█▍        | 278/2000 [06:54<29:56,  1.04s/it]

Training_loss 27.84237


 14%|█▍        | 279/2000 [06:55<30:03,  1.05s/it]

Training_loss 27.73168


 14%|█▍        | 280/2000 [06:56<30:51,  1.08s/it]

Training_loss 27.64090


 14%|█▍        | 281/2000 [06:57<31:53,  1.11s/it]

Training_loss 27.53444


 14%|█▍        | 282/2000 [06:59<41:05,  1.44s/it]

Training_loss 27.45000


 14%|█▍        | 283/2000 [07:01<40:11,  1.40s/it]

Training_loss 27.34742


 14%|█▍        | 284/2000 [07:02<38:41,  1.35s/it]

Training_loss 27.28370


 14%|█▍        | 285/2000 [07:04<48:29,  1.70s/it]

Training_loss 27.20992


 14%|█▍        | 286/2000 [07:06<51:32,  1.80s/it]

Training_loss 27.11135


 14%|█▍        | 287/2000 [07:08<52:00,  1.82s/it]

Training_loss 27.02218


 14%|█▍        | 288/2000 [07:10<52:01,  1.82s/it]

Training_loss 26.93245


 14%|█▍        | 289/2000 [07:12<53:56,  1.89s/it]

Training_loss 26.81154


 14%|█▍        | 290/2000 [07:15<1:06:01,  2.32s/it]

Training_loss 26.75891


 15%|█▍        | 291/2000 [07:18<1:05:01,  2.28s/it]

Training_loss 26.67473


 15%|█▍        | 292/2000 [07:19<56:52,  2.00s/it]  

Training_loss 26.57539


 15%|█▍        | 293/2000 [07:20<50:51,  1.79s/it]

Training_loss 26.50267


 15%|█▍        | 294/2000 [07:22<46:53,  1.65s/it]

Training_loss 26.40553


 15%|█▍        | 295/2000 [07:23<46:39,  1.64s/it]

Training_loss 26.32934


 15%|█▍        | 296/2000 [07:25<47:33,  1.67s/it]

Training_loss 26.20938


 15%|█▍        | 297/2000 [07:26<43:52,  1.55s/it]

Training_loss 26.11578


 15%|█▍        | 298/2000 [07:28<42:36,  1.50s/it]

Training_loss 25.99171


 15%|█▍        | 299/2000 [07:30<45:28,  1.60s/it]

Training_loss 25.90877


 15%|█▌        | 300/2000 [07:31<46:00,  1.62s/it]

Training_loss 25.82218


 15%|█▌        | 301/2000 [07:32<43:20,  1.53s/it]

Training_loss 25.73750


 15%|█▌        | 302/2000 [07:34<41:05,  1.45s/it]

Training_loss 25.60967


 15%|█▌        | 303/2000 [07:35<39:14,  1.39s/it]

Training_loss 25.53708


 15%|█▌        | 304/2000 [07:36<36:55,  1.31s/it]

Training_loss 25.43700


 15%|█▌        | 305/2000 [07:37<36:18,  1.29s/it]

Training_loss 25.31621


 15%|█▌        | 306/2000 [07:39<36:25,  1.29s/it]

Training_loss 25.23275


 15%|█▌        | 307/2000 [07:40<36:50,  1.31s/it]

Training_loss 25.16145


 15%|█▌        | 308/2000 [07:41<37:15,  1.32s/it]

Training_loss 25.08349


 15%|█▌        | 309/2000 [07:43<37:25,  1.33s/it]

Training_loss 25.02133


 16%|█▌        | 310/2000 [07:45<49:17,  1.75s/it]

Training_loss 24.94594


 16%|█▌        | 311/2000 [07:47<48:04,  1.71s/it]

Training_loss 24.87775


 16%|█▌        | 312/2000 [07:48<42:33,  1.51s/it]

Training_loss 24.73542


 16%|█▌        | 313/2000 [07:49<39:38,  1.41s/it]

Training_loss 24.67079


 16%|█▌        | 314/2000 [07:50<35:18,  1.26s/it]

Training_loss 24.57904


 16%|█▌        | 315/2000 [07:51<31:39,  1.13s/it]

Training_loss 24.50123


 16%|█▌        | 316/2000 [07:52<30:19,  1.08s/it]

Training_loss 24.45843


 16%|█▌        | 317/2000 [07:53<29:10,  1.04s/it]

Training_loss 24.39184


 16%|█▌        | 318/2000 [07:54<27:18,  1.03it/s]

Training_loss 24.31334


 16%|█▌        | 319/2000 [07:55<26:25,  1.06it/s]

Training_loss 24.22016


 16%|█▌        | 320/2000 [07:56<26:33,  1.05it/s]

Training_loss 24.16770


 16%|█▌        | 321/2000 [07:56<25:39,  1.09it/s]

Training_loss 24.10249


 16%|█▌        | 322/2000 [07:57<25:39,  1.09it/s]

Training_loss 23.99898


 16%|█▌        | 323/2000 [07:58<27:17,  1.02it/s]

Training_loss 23.91023


 16%|█▌        | 324/2000 [07:59<25:37,  1.09it/s]

Training_loss 23.87135


 16%|█▋        | 325/2000 [08:00<24:09,  1.16it/s]

Training_loss 23.77258


 16%|█▋        | 326/2000 [08:01<23:40,  1.18it/s]

Training_loss 23.66280


 16%|█▋        | 327/2000 [08:02<23:39,  1.18it/s]

Training_loss 23.53845


 16%|█▋        | 328/2000 [08:03<26:48,  1.04it/s]

Training_loss 23.43611


 16%|█▋        | 329/2000 [08:04<26:02,  1.07it/s]

Training_loss 23.34971


 16%|█▋        | 330/2000 [08:05<25:34,  1.09it/s]

Training_loss 23.27180


 17%|█▋        | 331/2000 [08:05<24:39,  1.13it/s]

Training_loss 23.19303


 17%|█▋        | 332/2000 [08:06<25:13,  1.10it/s]

Training_loss 23.08681


 17%|█▋        | 333/2000 [08:07<24:39,  1.13it/s]

Training_loss 23.05238


 17%|█▋        | 334/2000 [08:08<26:25,  1.05it/s]

Training_loss 22.95655


 17%|█▋        | 335/2000 [08:09<27:38,  1.00it/s]

Training_loss 22.84694


 17%|█▋        | 336/2000 [08:10<26:33,  1.04it/s]

Training_loss 22.72862


 17%|█▋        | 337/2000 [08:11<26:30,  1.05it/s]

Training_loss 22.67073


 17%|█▋        | 338/2000 [08:12<26:03,  1.06it/s]

Training_loss 22.63729


 17%|█▋        | 339/2000 [08:13<25:46,  1.07it/s]

Training_loss 22.51649


 17%|█▋        | 340/2000 [08:14<24:33,  1.13it/s]

Training_loss 22.48376


 17%|█▋        | 341/2000 [08:15<25:36,  1.08it/s]

Training_loss 22.37997


 17%|█▋        | 342/2000 [08:16<26:22,  1.05it/s]

Training_loss 22.34427


 17%|█▋        | 343/2000 [08:17<25:12,  1.10it/s]

Training_loss 22.22159


 17%|█▋        | 344/2000 [08:18<25:28,  1.08it/s]

Training_loss 22.14284


 17%|█▋        | 345/2000 [08:19<25:48,  1.07it/s]

Training_loss 22.07224


 17%|█▋        | 346/2000 [08:19<25:23,  1.09it/s]

Training_loss 22.00558


 17%|█▋        | 347/2000 [08:20<24:38,  1.12it/s]

Training_loss 21.95429


 17%|█▋        | 348/2000 [08:21<25:16,  1.09it/s]

Training_loss 21.88777


 17%|█▋        | 349/2000 [08:22<24:52,  1.11it/s]

Training_loss 21.78163


 18%|█▊        | 350/2000 [08:23<25:44,  1.07it/s]

Training_loss 21.71022


 18%|█▊        | 351/2000 [08:24<25:18,  1.09it/s]

Training_loss 21.64155


 18%|█▊        | 352/2000 [08:25<27:48,  1.01s/it]

Training_loss 21.54309


 18%|█▊        | 353/2000 [08:26<28:14,  1.03s/it]

Training_loss 21.50675


 18%|█▊        | 354/2000 [08:27<28:47,  1.05s/it]

Training_loss 21.40863


 18%|█▊        | 355/2000 [08:28<27:43,  1.01s/it]

Training_loss 21.37133


 18%|█▊        | 356/2000 [08:29<26:25,  1.04it/s]

Training_loss 21.30933


 18%|█▊        | 357/2000 [08:30<25:17,  1.08it/s]

Training_loss 21.23960


 18%|█▊        | 358/2000 [08:31<24:29,  1.12it/s]

Training_loss 21.15395


 18%|█▊        | 359/2000 [08:32<25:03,  1.09it/s]

Training_loss 21.04965


 18%|█▊        | 360/2000 [08:34<31:14,  1.14s/it]

Training_loss 20.98023


 18%|█▊        | 361/2000 [08:35<33:26,  1.22s/it]

Training_loss 20.90787


 18%|█▊        | 362/2000 [08:36<33:25,  1.22s/it]

Training_loss 20.82786


 18%|█▊        | 363/2000 [08:38<34:50,  1.28s/it]

Training_loss 20.75891


 18%|█▊        | 364/2000 [08:39<33:53,  1.24s/it]

Training_loss 20.71155


 18%|█▊        | 365/2000 [08:40<31:26,  1.15s/it]

Training_loss 20.68391


 18%|█▊        | 366/2000 [08:41<30:03,  1.10s/it]

Training_loss 20.64984


 18%|█▊        | 367/2000 [08:42<29:40,  1.09s/it]

Training_loss 20.60820


 18%|█▊        | 368/2000 [08:43<28:57,  1.06s/it]

Training_loss 20.53180


 18%|█▊        | 369/2000 [08:44<28:47,  1.06s/it]

Training_loss 20.45960


 18%|█▊        | 370/2000 [08:45<28:12,  1.04s/it]

Training_loss 20.37073


 19%|█▊        | 371/2000 [08:46<28:21,  1.04s/it]

Training_loss 20.31433


 19%|█▊        | 372/2000 [08:47<27:59,  1.03s/it]

Training_loss 20.25221


 19%|█▊        | 373/2000 [08:48<26:29,  1.02it/s]

Training_loss 20.17213


 19%|█▊        | 374/2000 [08:49<27:00,  1.00it/s]

Training_loss 20.10726


 19%|█▉        | 375/2000 [08:49<25:14,  1.07it/s]

Training_loss 20.03377


 19%|█▉        | 376/2000 [08:50<25:12,  1.07it/s]

Training_loss 19.93449


 19%|█▉        | 377/2000 [08:51<24:22,  1.11it/s]

Training_loss 19.89834


 19%|█▉        | 378/2000 [08:52<25:58,  1.04it/s]

Training_loss 19.87058


 19%|█▉        | 379/2000 [08:53<24:57,  1.08it/s]

Training_loss 19.80569


 19%|█▉        | 380/2000 [08:54<24:15,  1.11it/s]

Training_loss 19.75718


 19%|█▉        | 381/2000 [08:55<24:23,  1.11it/s]

Training_loss 19.70741


 19%|█▉        | 382/2000 [08:56<25:26,  1.06it/s]

Training_loss 19.64526


 19%|█▉        | 383/2000 [08:57<24:48,  1.09it/s]

Training_loss 19.62923


 19%|█▉        | 384/2000 [08:58<24:30,  1.10it/s]

Training_loss 19.54258


 19%|█▉        | 385/2000 [08:59<24:54,  1.08it/s]

Training_loss 19.47410


 19%|█▉        | 386/2000 [09:00<25:29,  1.06it/s]

Training_loss 19.39192


 19%|█▉        | 387/2000 [09:01<24:58,  1.08it/s]

Training_loss 19.35672


 19%|█▉        | 388/2000 [09:01<24:34,  1.09it/s]

Training_loss 19.29160


 19%|█▉        | 389/2000 [09:03<26:46,  1.00it/s]

Training_loss 19.27235


 20%|█▉        | 390/2000 [09:04<28:01,  1.04s/it]

Training_loss 19.18204


 20%|█▉        | 391/2000 [09:05<27:15,  1.02s/it]

Training_loss 19.13872


 20%|█▉        | 392/2000 [09:06<25:20,  1.06it/s]

Training_loss 19.09225


 20%|█▉        | 393/2000 [09:06<25:08,  1.07it/s]

Training_loss 19.01730


 20%|█▉        | 394/2000 [09:07<25:11,  1.06it/s]

Training_loss 18.95858


 20%|█▉        | 395/2000 [09:08<24:11,  1.11it/s]

Training_loss 18.87852


 20%|█▉        | 396/2000 [09:09<24:41,  1.08it/s]

Training_loss 18.80750


 20%|█▉        | 397/2000 [09:10<24:58,  1.07it/s]

Training_loss 18.76405


 20%|█▉        | 398/2000 [09:11<24:18,  1.10it/s]

Training_loss 18.73496


 20%|█▉        | 399/2000 [09:12<22:58,  1.16it/s]

Training_loss 18.68842


 20%|██        | 400/2000 [09:13<23:16,  1.15it/s]

Training_loss 18.71038


 20%|██        | 401/2000 [09:13<22:51,  1.17it/s]

Training_loss 18.66880


 20%|██        | 402/2000 [09:14<22:39,  1.18it/s]

Training_loss 18.63022


 20%|██        | 403/2000 [09:15<24:04,  1.11it/s]

Training_loss 18.58089


 20%|██        | 404/2000 [09:16<23:14,  1.14it/s]

Training_loss 18.51982


 20%|██        | 405/2000 [09:17<24:23,  1.09it/s]

Training_loss 18.47412


 20%|██        | 406/2000 [09:18<26:57,  1.01s/it]

Training_loss 18.42335


 20%|██        | 407/2000 [09:19<25:12,  1.05it/s]

Training_loss 18.39143


 20%|██        | 408/2000 [09:20<24:24,  1.09it/s]

Training_loss 18.35369


 20%|██        | 409/2000 [09:21<24:50,  1.07it/s]

Training_loss 18.31599


 20%|██        | 410/2000 [09:22<23:50,  1.11it/s]

Training_loss 18.27395


 21%|██        | 411/2000 [09:23<23:55,  1.11it/s]

Training_loss 18.21127


 21%|██        | 412/2000 [09:24<23:54,  1.11it/s]

Training_loss 18.16459


 21%|██        | 413/2000 [09:24<23:11,  1.14it/s]

Training_loss 18.16749


 21%|██        | 414/2000 [09:25<22:41,  1.16it/s]

Training_loss 18.10732


 21%|██        | 415/2000 [09:26<25:10,  1.05it/s]

Training_loss 18.04961


 21%|██        | 416/2000 [09:27<24:03,  1.10it/s]

Training_loss 17.97958


 21%|██        | 417/2000 [09:28<23:41,  1.11it/s]

Training_loss 17.97768


 21%|██        | 418/2000 [09:29<24:56,  1.06it/s]

Training_loss 17.95366


 21%|██        | 419/2000 [09:30<24:54,  1.06it/s]

Training_loss 17.91055


 21%|██        | 420/2000 [09:31<24:56,  1.06it/s]

Training_loss 17.87428


 21%|██        | 421/2000 [09:32<25:14,  1.04it/s]

Training_loss 17.85532


 21%|██        | 422/2000 [09:33<27:18,  1.04s/it]

Training_loss 17.80023


 21%|██        | 423/2000 [09:34<28:15,  1.08s/it]

Training_loss 17.75301


 21%|██        | 424/2000 [09:35<27:46,  1.06s/it]

Training_loss 17.75095


 21%|██▏       | 425/2000 [09:36<25:59,  1.01it/s]

Training_loss 17.70496


 21%|██▏       | 426/2000 [09:37<26:05,  1.01it/s]

Training_loss 17.67461


 21%|██▏       | 427/2000 [09:38<24:19,  1.08it/s]

Training_loss 17.63178


 21%|██▏       | 428/2000 [09:39<25:01,  1.05it/s]

Training_loss 17.56629


 21%|██▏       | 429/2000 [09:40<26:54,  1.03s/it]

Training_loss 17.53321


 22%|██▏       | 430/2000 [09:42<30:38,  1.17s/it]

Training_loss 17.49119


 22%|██▏       | 431/2000 [09:43<32:21,  1.24s/it]

Training_loss 17.45934


 22%|██▏       | 432/2000 [09:45<36:39,  1.40s/it]

Training_loss 17.43085


 22%|██▏       | 433/2000 [09:47<38:35,  1.48s/it]

Training_loss 17.39386


 22%|██▏       | 434/2000 [09:48<37:49,  1.45s/it]

Training_loss 17.37658


 22%|██▏       | 435/2000 [09:49<35:41,  1.37s/it]

Training_loss 17.29541


 22%|██▏       | 436/2000 [09:50<34:47,  1.33s/it]

Training_loss 17.25032


 22%|██▏       | 437/2000 [09:52<33:54,  1.30s/it]

Training_loss 17.22341


 22%|██▏       | 438/2000 [09:53<33:09,  1.27s/it]

Training_loss 17.20749


 22%|██▏       | 439/2000 [09:54<34:47,  1.34s/it]

Training_loss 17.18981


 22%|██▏       | 440/2000 [09:56<35:49,  1.38s/it]

Training_loss 17.17494


 22%|██▏       | 441/2000 [09:57<36:21,  1.40s/it]

Training_loss 17.13665


 22%|██▏       | 442/2000 [10:00<49:12,  1.90s/it]

Training_loss 17.09743


 22%|██▏       | 443/2000 [10:02<49:34,  1.91s/it]

Training_loss 17.10046


 22%|██▏       | 444/2000 [10:04<47:46,  1.84s/it]

Training_loss 17.10719


 22%|██▏       | 445/2000 [10:06<45:36,  1.76s/it]

Training_loss 17.06714


 22%|██▏       | 446/2000 [10:07<43:32,  1.68s/it]

Training_loss 17.04578


 22%|██▏       | 447/2000 [10:09<43:47,  1.69s/it]

Training_loss 17.01934


 22%|██▏       | 448/2000 [10:11<50:45,  1.96s/it]

Training_loss 16.96870


 22%|██▏       | 449/2000 [10:14<52:43,  2.04s/it]

Training_loss 16.93001


 22%|██▎       | 450/2000 [10:15<47:08,  1.83s/it]

Training_loss 16.90598


 23%|██▎       | 451/2000 [10:16<45:07,  1.75s/it]

Training_loss 16.87485


 23%|██▎       | 452/2000 [10:19<50:55,  1.97s/it]

Training_loss 16.82472


 23%|██▎       | 453/2000 [10:20<46:31,  1.80s/it]

Training_loss 16.78308


 23%|██▎       | 454/2000 [10:22<43:15,  1.68s/it]

Training_loss 16.73808


 23%|██▎       | 455/2000 [10:23<39:21,  1.53s/it]

Training_loss 16.71588


 23%|██▎       | 456/2000 [10:24<35:58,  1.40s/it]

Training_loss 16.67692


 23%|██▎       | 457/2000 [10:25<35:41,  1.39s/it]

Training_loss 16.63261


 23%|██▎       | 458/2000 [10:28<43:47,  1.70s/it]

Training_loss 16.63299


 23%|██▎       | 459/2000 [10:31<52:23,  2.04s/it]

Training_loss 16.60806


 23%|██▎       | 460/2000 [10:33<58:11,  2.27s/it]

Training_loss 16.59342


 23%|██▎       | 461/2000 [10:35<55:31,  2.16s/it]

Training_loss 16.59407


 23%|██▎       | 462/2000 [10:37<52:31,  2.05s/it]

Training_loss 16.59779


 23%|██▎       | 463/2000 [10:39<54:32,  2.13s/it]

Training_loss 16.57930


 23%|██▎       | 464/2000 [10:41<50:15,  1.96s/it]

Training_loss 16.55893


 23%|██▎       | 465/2000 [10:43<46:25,  1.81s/it]

Training_loss 16.52212


 23%|██▎       | 466/2000 [10:44<41:43,  1.63s/it]

Training_loss 16.48985


 23%|██▎       | 467/2000 [10:45<38:37,  1.51s/it]

Training_loss 16.46932


 23%|██▎       | 468/2000 [10:46<36:29,  1.43s/it]

Training_loss 16.45159


 23%|██▎       | 469/2000 [10:48<35:39,  1.40s/it]

Training_loss 16.43549


 24%|██▎       | 470/2000 [10:49<34:16,  1.34s/it]

Training_loss 16.40260


 24%|██▎       | 471/2000 [10:50<33:46,  1.33s/it]

Training_loss 16.37455


 24%|██▎       | 472/2000 [10:51<33:01,  1.30s/it]

Training_loss 16.34949


 24%|██▎       | 473/2000 [10:53<32:54,  1.29s/it]

Training_loss 16.34782


 24%|██▎       | 474/2000 [10:54<32:05,  1.26s/it]

Training_loss 16.33601


 24%|██▍       | 475/2000 [10:55<31:28,  1.24s/it]

Training_loss 16.28119


 24%|██▍       | 476/2000 [10:56<30:59,  1.22s/it]

Training_loss 16.23164


 24%|██▍       | 477/2000 [10:59<40:27,  1.59s/it]

Training_loss 16.21741


 24%|██▍       | 478/2000 [11:02<52:01,  2.05s/it]

Training_loss 16.20672


 24%|██▍       | 479/2000 [11:04<52:37,  2.08s/it]

Training_loss 16.18378


 24%|██▍       | 480/2000 [11:08<1:06:11,  2.61s/it]

Training_loss 16.15083


 24%|██▍       | 481/2000 [11:11<1:13:15,  2.89s/it]

Training_loss 16.14521


 24%|██▍       | 482/2000 [11:14<1:11:08,  2.81s/it]

Training_loss 16.13499


 24%|██▍       | 483/2000 [11:16<1:09:17,  2.74s/it]

Training_loss 16.14115


 24%|██▍       | 484/2000 [11:20<1:13:35,  2.91s/it]

Training_loss 16.09985


 24%|██▍       | 485/2000 [11:22<1:07:10,  2.66s/it]

Training_loss 16.09270


 24%|██▍       | 486/2000 [11:23<57:59,  2.30s/it]  

Training_loss 16.08817


 24%|██▍       | 487/2000 [11:25<50:20,  2.00s/it]

Training_loss 16.08640


 24%|██▍       | 488/2000 [11:27<51:08,  2.03s/it]

Training_loss 16.07054


 24%|██▍       | 489/2000 [11:28<45:00,  1.79s/it]

Training_loss 16.07301


 24%|██▍       | 490/2000 [11:29<39:32,  1.57s/it]

Training_loss 16.05723


 25%|██▍       | 491/2000 [11:30<37:42,  1.50s/it]

Training_loss 16.04718


 25%|██▍       | 492/2000 [11:31<35:09,  1.40s/it]

Training_loss 16.07142


 25%|██▍       | 493/2000 [11:33<33:04,  1.32s/it]

Training_loss 16.07688


 25%|██▍       | 494/2000 [11:34<31:31,  1.26s/it]

Training_loss 16.05765


 25%|██▍       | 495/2000 [11:35<29:07,  1.16s/it]

Training_loss 16.04211


 25%|██▍       | 496/2000 [11:36<29:11,  1.16s/it]

Training_loss 16.01804


 25%|██▍       | 497/2000 [11:37<28:25,  1.13s/it]

Training_loss 16.00808


 25%|██▍       | 498/2000 [11:38<27:59,  1.12s/it]

Training_loss 16.01148


 25%|██▍       | 499/2000 [11:39<27:17,  1.09s/it]

Training_loss 16.00071


 25%|██▌       | 500/2000 [11:40<26:43,  1.07s/it]

Training_loss 16.00391


 25%|██▌       | 501/2000 [11:41<26:56,  1.08s/it]

Training_loss 16.01781


 25%|██▌       | 502/2000 [11:42<26:55,  1.08s/it]

Training_loss 16.03239


 25%|██▌       | 503/2000 [11:43<28:34,  1.15s/it]

Training_loss 16.01383


 25%|██▌       | 504/2000 [11:45<32:24,  1.30s/it]

Training_loss 15.99386


 25%|██▌       | 505/2000 [11:47<37:28,  1.50s/it]

Training_loss 15.99000


 25%|██▌       | 506/2000 [11:49<39:23,  1.58s/it]

Training_loss 16.00188


 25%|██▌       | 507/2000 [11:50<38:16,  1.54s/it]

Training_loss 16.03259


 25%|██▌       | 508/2000 [11:52<38:55,  1.57s/it]

Training_loss 16.01169


 25%|██▌       | 509/2000 [11:53<36:52,  1.48s/it]

Training_loss 16.01675


 26%|██▌       | 510/2000 [11:55<36:23,  1.47s/it]

Training_loss 15.99206


 26%|██▌       | 511/2000 [11:56<34:43,  1.40s/it]

Training_loss 15.98687


 26%|██▌       | 512/2000 [11:57<33:41,  1.36s/it]

Training_loss 15.97182


 26%|██▌       | 513/2000 [11:59<37:05,  1.50s/it]

Training_loss 15.97747


 26%|██▌       | 514/2000 [12:01<39:53,  1.61s/it]

Training_loss 15.96583


 26%|██▌       | 515/2000 [12:03<42:24,  1.71s/it]

Training_loss 15.98193


 26%|██▌       | 516/2000 [12:04<40:02,  1.62s/it]

Training_loss 15.96338


 26%|██▌       | 517/2000 [12:06<42:18,  1.71s/it]

Training_loss 15.92115


 26%|██▌       | 518/2000 [12:08<42:50,  1.73s/it]

Training_loss 15.93031


 26%|██▌       | 519/2000 [12:09<40:46,  1.65s/it]

Training_loss 15.91020


 26%|██▌       | 520/2000 [12:11<39:39,  1.61s/it]

Training_loss 15.89351


 26%|██▌       | 521/2000 [12:12<39:22,  1.60s/it]

Training_loss 15.91029


 26%|██▌       | 522/2000 [12:14<38:53,  1.58s/it]

Training_loss 15.91528


 26%|██▌       | 523/2000 [12:15<36:46,  1.49s/it]

Training_loss 15.92348


 26%|██▌       | 524/2000 [12:17<35:23,  1.44s/it]

Training_loss 15.92658


 26%|██▋       | 525/2000 [12:18<36:06,  1.47s/it]

Training_loss 15.89603


 26%|██▋       | 526/2000 [12:19<35:08,  1.43s/it]

Training_loss 15.87442


 26%|██▋       | 527/2000 [12:21<33:34,  1.37s/it]

Training_loss 15.85395


 26%|██▋       | 528/2000 [12:22<33:07,  1.35s/it]

Training_loss 15.84616


 26%|██▋       | 529/2000 [12:23<32:30,  1.33s/it]

Training_loss 15.85689


 26%|██▋       | 530/2000 [12:25<31:43,  1.29s/it]

Training_loss 15.87169


 27%|██▋       | 531/2000 [12:26<31:07,  1.27s/it]

Training_loss 15.86592


 27%|██▋       | 532/2000 [12:27<30:36,  1.25s/it]

Training_loss 15.88231


 27%|██▋       | 533/2000 [12:28<29:39,  1.21s/it]

Training_loss 15.88108


 27%|██▋       | 534/2000 [12:29<29:47,  1.22s/it]

Training_loss 15.84553


 27%|██▋       | 535/2000 [12:31<30:17,  1.24s/it]

Training_loss 15.84003


 27%|██▋       | 536/2000 [12:32<30:19,  1.24s/it]

Training_loss 15.84848


 27%|██▋       | 537/2000 [12:33<31:15,  1.28s/it]

Training_loss 15.83335


 27%|██▋       | 538/2000 [12:35<34:15,  1.41s/it]

Training_loss 15.85651


 27%|██▋       | 539/2000 [12:36<34:16,  1.41s/it]

Training_loss 15.83154


 27%|██▋       | 540/2000 [12:38<34:12,  1.41s/it]

Training_loss 15.82405


 27%|██▋       | 541/2000 [12:39<33:22,  1.37s/it]

Training_loss 15.84705


 27%|██▋       | 542/2000 [12:41<35:23,  1.46s/it]

Training_loss 15.83337


 27%|██▋       | 543/2000 [12:43<43:25,  1.79s/it]

Training_loss 15.82341


 27%|██▋       | 544/2000 [12:45<40:10,  1.66s/it]

Training_loss 15.80835


 27%|██▋       | 545/2000 [12:46<40:37,  1.67s/it]

Training_loss 15.79679


 27%|██▋       | 546/2000 [12:48<37:47,  1.56s/it]

Training_loss 15.77798


 27%|██▋       | 547/2000 [12:49<36:49,  1.52s/it]

Training_loss 15.77943


 27%|██▋       | 548/2000 [12:51<38:57,  1.61s/it]

Training_loss 15.78617


 27%|██▋       | 549/2000 [12:52<35:53,  1.48s/it]

Training_loss 15.78021


 28%|██▊       | 550/2000 [12:53<33:52,  1.40s/it]

Training_loss 15.76068


 28%|██▊       | 551/2000 [12:54<31:46,  1.32s/it]

Training_loss 15.76187


 28%|██▊       | 552/2000 [12:55<30:36,  1.27s/it]

Training_loss 15.74946


 28%|██▊       | 553/2000 [12:57<29:42,  1.23s/it]

Training_loss 15.73988


 28%|██▊       | 554/2000 [12:58<28:50,  1.20s/it]

Training_loss 15.74076


 28%|██▊       | 555/2000 [12:59<29:12,  1.21s/it]

Training_loss 15.74903


 28%|██▊       | 556/2000 [13:00<29:09,  1.21s/it]

Training_loss 15.73053


 28%|██▊       | 557/2000 [13:01<28:55,  1.20s/it]

Training_loss 15.74177


 28%|██▊       | 558/2000 [13:03<28:31,  1.19s/it]

Training_loss 15.77329


 28%|██▊       | 559/2000 [13:04<28:13,  1.18s/it]

Training_loss 15.75646


 28%|██▊       | 560/2000 [13:05<28:37,  1.19s/it]

Training_loss 15.75408


 28%|██▊       | 561/2000 [13:06<31:10,  1.30s/it]

Training_loss 15.75001


 28%|██▊       | 562/2000 [13:08<31:17,  1.31s/it]

Training_loss 15.70924


 28%|██▊       | 563/2000 [13:09<31:22,  1.31s/it]

Training_loss 15.73418


 28%|██▊       | 564/2000 [13:10<31:07,  1.30s/it]

Training_loss 15.73071


 28%|██▊       | 565/2000 [13:11<29:36,  1.24s/it]

Training_loss 15.72187


 28%|██▊       | 566/2000 [13:13<29:23,  1.23s/it]

Training_loss 15.73304


 28%|██▊       | 567/2000 [13:14<28:51,  1.21s/it]

Training_loss 15.71503


 28%|██▊       | 568/2000 [13:15<28:17,  1.19s/it]

Training_loss 15.70565


 28%|██▊       | 569/2000 [13:16<28:08,  1.18s/it]

Training_loss 15.68824


 28%|██▊       | 570/2000 [13:17<28:07,  1.18s/it]

Training_loss 15.68705


 29%|██▊       | 571/2000 [13:18<27:40,  1.16s/it]

Training_loss 15.67709


 29%|██▊       | 572/2000 [13:20<27:27,  1.15s/it]

Training_loss 15.67019


 29%|██▊       | 573/2000 [13:21<27:06,  1.14s/it]

Training_loss 15.68005


 29%|██▊       | 574/2000 [13:22<28:58,  1.22s/it]

Training_loss 15.68783


 29%|██▉       | 575/2000 [13:23<29:00,  1.22s/it]

Training_loss 15.70718


 29%|██▉       | 576/2000 [13:25<28:55,  1.22s/it]

Training_loss 15.70579


 29%|██▉       | 577/2000 [13:26<28:48,  1.21s/it]

Training_loss 15.69290


 29%|██▉       | 578/2000 [13:27<28:55,  1.22s/it]

Training_loss 15.69737


 29%|██▉       | 579/2000 [13:28<28:34,  1.21s/it]

Training_loss 15.67413


 29%|██▉       | 580/2000 [13:29<28:34,  1.21s/it]

Training_loss 15.65546


 29%|██▉       | 581/2000 [13:30<28:00,  1.18s/it]

Training_loss 15.65852


 29%|██▉       | 582/2000 [13:32<27:53,  1.18s/it]

Training_loss 15.64453


 29%|██▉       | 583/2000 [13:33<28:15,  1.20s/it]

Training_loss 15.62838


 29%|██▉       | 584/2000 [13:34<28:39,  1.21s/it]

Training_loss 15.63912


 29%|██▉       | 585/2000 [13:36<30:49,  1.31s/it]

Training_loss 15.64980


 29%|██▉       | 586/2000 [13:38<34:38,  1.47s/it]

Training_loss 15.66767


 29%|██▉       | 587/2000 [13:40<40:41,  1.73s/it]

Training_loss 15.66862


 29%|██▉       | 588/2000 [13:42<44:01,  1.87s/it]

Training_loss 15.66496


 29%|██▉       | 589/2000 [13:45<53:58,  2.29s/it]

Training_loss 15.67046


 30%|██▉       | 590/2000 [13:48<53:17,  2.27s/it]

Training_loss 15.67038


 30%|██▉       | 591/2000 [13:50<52:33,  2.24s/it]

Training_loss 15.69653


 30%|██▉       | 592/2000 [13:53<58:33,  2.50s/it]

Training_loss 15.69382


 30%|██▉       | 593/2000 [13:55<59:44,  2.55s/it]

Training_loss 15.70175


 30%|██▉       | 594/2000 [13:57<50:43,  2.16s/it]

Training_loss 15.70513


 30%|██▉       | 595/2000 [13:58<46:54,  2.00s/it]

Training_loss 15.70623


 30%|██▉       | 596/2000 [14:00<46:03,  1.97s/it]

Training_loss 15.72095


 30%|██▉       | 597/2000 [14:02<41:24,  1.77s/it]

Training_loss 15.70020


 30%|██▉       | 598/2000 [14:03<38:14,  1.64s/it]

Training_loss 15.68421


 30%|██▉       | 599/2000 [14:04<34:54,  1.50s/it]

Training_loss 15.65577


 30%|███       | 600/2000 [14:05<32:17,  1.38s/it]

Training_loss 15.63915


 30%|███       | 601/2000 [14:06<28:25,  1.22s/it]

Training_loss 15.63140


 30%|███       | 602/2000 [14:07<26:03,  1.12s/it]

Training_loss 15.63908


 30%|███       | 603/2000 [14:09<30:24,  1.31s/it]

Training_loss 15.64929


 30%|███       | 604/2000 [14:10<32:51,  1.41s/it]

Training_loss 15.63525


 30%|███       | 605/2000 [14:12<35:01,  1.51s/it]

Training_loss 15.64085


 30%|███       | 606/2000 [14:14<37:11,  1.60s/it]

Training_loss 15.65967


 30%|███       | 607/2000 [14:16<43:03,  1.85s/it]

Training_loss 15.69870


 30%|███       | 608/2000 [14:19<46:51,  2.02s/it]

Training_loss 15.68578


 30%|███       | 609/2000 [14:20<42:23,  1.83s/it]

Training_loss 15.68726


 30%|███       | 610/2000 [14:21<39:22,  1.70s/it]

Training_loss 15.67691


 31%|███       | 611/2000 [14:23<39:38,  1.71s/it]

Training_loss 15.67669


 31%|███       | 612/2000 [14:25<43:03,  1.86s/it]

Training_loss 15.66523


 31%|███       | 613/2000 [14:27<41:12,  1.78s/it]

Training_loss 15.64220


 31%|███       | 614/2000 [14:28<38:52,  1.68s/it]

Training_loss 15.64080


 31%|███       | 615/2000 [14:30<35:48,  1.55s/it]

Training_loss 15.65308


 31%|███       | 616/2000 [14:31<33:06,  1.44s/it]

Training_loss 15.64161


 31%|███       | 617/2000 [14:32<31:27,  1.36s/it]

Training_loss 15.64269


 31%|███       | 618/2000 [14:33<30:11,  1.31s/it]

Training_loss 15.65610


 31%|███       | 619/2000 [14:34<28:51,  1.25s/it]

Training_loss 15.66316


 31%|███       | 620/2000 [14:36<28:21,  1.23s/it]

Training_loss 15.68088


 31%|███       | 621/2000 [14:37<27:58,  1.22s/it]

Training_loss 15.66229


 31%|███       | 622/2000 [14:38<28:06,  1.22s/it]

Training_loss 15.66847


 31%|███       | 623/2000 [14:39<28:18,  1.23s/it]

Training_loss 15.68220


 31%|███       | 624/2000 [14:40<27:51,  1.21s/it]

Training_loss 15.69713


 31%|███▏      | 625/2000 [14:42<28:28,  1.24s/it]

Training_loss 15.72090


 31%|███▏      | 626/2000 [14:43<28:47,  1.26s/it]

Training_loss 15.72992


 31%|███▏      | 627/2000 [14:44<28:53,  1.26s/it]

Training_loss 15.73820


 31%|███▏      | 628/2000 [14:46<28:26,  1.24s/it]

Training_loss 15.71751


 31%|███▏      | 629/2000 [14:47<28:23,  1.24s/it]

Training_loss 15.72719


 32%|███▏      | 630/2000 [14:48<27:43,  1.21s/it]

Training_loss 15.74538


 32%|███▏      | 631/2000 [14:49<27:17,  1.20s/it]

Training_loss 15.79174


 32%|███▏      | 632/2000 [14:50<27:11,  1.19s/it]

Training_loss 15.80104


 32%|███▏      | 633/2000 [14:51<26:32,  1.16s/it]

Training_loss 15.78398


 32%|███▏      | 634/2000 [14:53<26:39,  1.17s/it]

Training_loss 15.79981


 32%|███▏      | 635/2000 [14:54<26:43,  1.17s/it]

Training_loss 15.80664


 32%|███▏      | 636/2000 [14:55<26:31,  1.17s/it]

Training_loss 15.80424


 32%|███▏      | 637/2000 [14:56<26:37,  1.17s/it]

Training_loss 15.80797


 32%|███▏      | 638/2000 [14:57<27:15,  1.20s/it]

Training_loss 15.80922


 32%|███▏      | 639/2000 [14:59<29:19,  1.29s/it]

Training_loss 15.81863


 32%|███▏      | 640/2000 [15:00<29:07,  1.28s/it]

Training_loss 15.84047


 32%|███▏      | 641/2000 [15:01<28:45,  1.27s/it]

Training_loss 15.82574


 32%|███▏      | 642/2000 [15:02<27:29,  1.21s/it]

Training_loss 15.83844


 32%|███▏      | 643/2000 [15:04<27:17,  1.21s/it]

Training_loss 15.84253


 32%|███▏      | 644/2000 [15:05<27:11,  1.20s/it]

Training_loss 15.86234


 32%|███▏      | 645/2000 [15:06<27:26,  1.22s/it]

Training_loss 15.87658


 32%|███▏      | 646/2000 [15:07<29:04,  1.29s/it]

Training_loss 15.89316


 32%|███▏      | 647/2000 [15:09<30:13,  1.34s/it]

Training_loss 15.89357


 32%|███▏      | 648/2000 [15:11<32:05,  1.42s/it]

Training_loss 15.90456


 32%|███▏      | 649/2000 [15:14<47:44,  2.12s/it]

Training_loss 15.92382


 32%|███▎      | 650/2000 [15:18<56:12,  2.50s/it]

Training_loss 15.91943


 33%|███▎      | 651/2000 [15:20<53:18,  2.37s/it]

Training_loss 15.94183


 33%|███▎      | 652/2000 [15:23<56:31,  2.52s/it]

Training_loss 15.95980


 33%|███▎      | 653/2000 [15:24<51:21,  2.29s/it]

Training_loss 15.95200


 33%|███▎      | 654/2000 [15:26<47:35,  2.12s/it]

Training_loss 15.96220


 33%|███▎      | 655/2000 [15:27<42:28,  1.90s/it]

Training_loss 15.98023


 33%|███▎      | 656/2000 [15:29<37:34,  1.68s/it]

Training_loss 15.97323


 33%|███▎      | 657/2000 [15:30<37:01,  1.65s/it]

Training_loss 15.99480


 33%|███▎      | 658/2000 [15:32<36:25,  1.63s/it]

Training_loss 16.00777


 33%|███▎      | 659/2000 [15:33<34:23,  1.54s/it]

Training_loss 16.01211


 33%|███▎      | 660/2000 [15:35<38:20,  1.72s/it]

Training_loss 16.02129


 33%|███▎      | 661/2000 [15:37<38:45,  1.74s/it]

Training_loss 15.99649


 33%|███▎      | 662/2000 [15:39<38:29,  1.73s/it]

Training_loss 15.97756


 33%|███▎      | 663/2000 [15:41<38:40,  1.74s/it]

Training_loss 15.96721


 33%|███▎      | 664/2000 [15:42<37:04,  1.67s/it]

Training_loss 15.95290


 33%|███▎      | 665/2000 [15:44<36:09,  1.63s/it]

Training_loss 15.97476


 33%|███▎      | 666/2000 [15:46<38:49,  1.75s/it]

Training_loss 15.96823


 33%|███▎      | 667/2000 [15:47<38:10,  1.72s/it]

Training_loss 15.99185


 33%|███▎      | 668/2000 [15:49<37:30,  1.69s/it]

Training_loss 16.01284


 33%|███▎      | 669/2000 [15:51<41:11,  1.86s/it]

Training_loss 16.01590


 34%|███▎      | 670/2000 [15:54<45:12,  2.04s/it]

Training_loss 16.01934


 34%|███▎      | 671/2000 [15:55<43:27,  1.96s/it]

Training_loss 16.04604


 34%|███▎      | 672/2000 [15:59<51:40,  2.33s/it]

Training_loss 16.04729


 34%|███▎      | 673/2000 [16:00<47:46,  2.16s/it]

Training_loss 16.06670


 34%|███▎      | 674/2000 [16:02<43:39,  1.98s/it]

Training_loss 16.06708


 34%|███▍      | 675/2000 [16:04<47:02,  2.13s/it]

Training_loss 16.07499


 34%|███▍      | 676/2000 [16:06<43:32,  1.97s/it]

Training_loss 16.08972


 34%|███▍      | 677/2000 [16:07<40:05,  1.82s/it]

Training_loss 16.09505


 34%|███▍      | 678/2000 [16:09<36:51,  1.67s/it]

Training_loss 16.08468


 34%|███▍      | 679/2000 [16:10<35:29,  1.61s/it]

Training_loss 16.08787


 34%|███▍      | 680/2000 [16:12<34:09,  1.55s/it]

Training_loss 16.09535


 34%|███▍      | 681/2000 [16:14<42:44,  1.94s/it]

Training_loss 16.11663


 34%|███▍      | 682/2000 [16:19<57:46,  2.63s/it]

Training_loss 16.10482


 34%|███▍      | 683/2000 [16:21<54:33,  2.49s/it]

Training_loss 16.08514


 34%|███▍      | 684/2000 [16:22<46:39,  2.13s/it]

Training_loss 16.08294


 34%|███▍      | 685/2000 [16:24<42:01,  1.92s/it]

Training_loss 16.10043


 34%|███▍      | 686/2000 [16:25<39:16,  1.79s/it]

Training_loss 16.07542


 34%|███▍      | 687/2000 [16:27<37:21,  1.71s/it]

Training_loss 16.07913


 34%|███▍      | 688/2000 [16:28<36:42,  1.68s/it]

Training_loss 16.09639


 34%|███▍      | 689/2000 [16:30<35:23,  1.62s/it]

Training_loss 16.11547


 34%|███▍      | 690/2000 [16:31<33:26,  1.53s/it]

Training_loss 16.12629


 35%|███▍      | 691/2000 [16:33<33:20,  1.53s/it]

Training_loss 16.13885


 35%|███▍      | 692/2000 [16:34<35:00,  1.61s/it]

Training_loss 16.15559


 35%|███▍      | 693/2000 [16:36<35:08,  1.61s/it]

Training_loss 16.18852


 35%|███▍      | 694/2000 [16:37<34:27,  1.58s/it]

Training_loss 16.19569


 35%|███▍      | 695/2000 [16:39<33:29,  1.54s/it]

Training_loss 16.16340


 35%|███▍      | 696/2000 [16:40<31:15,  1.44s/it]

Training_loss 16.16581


 35%|███▍      | 697/2000 [16:42<32:04,  1.48s/it]

Training_loss 16.17823


 35%|███▍      | 698/2000 [16:44<34:36,  1.59s/it]

Training_loss 16.16501


 35%|███▍      | 699/2000 [16:46<43:05,  1.99s/it]

Training_loss 16.16578


 35%|███▌      | 700/2000 [16:48<41:56,  1.94s/it]

Training_loss 16.17720


 35%|███▌      | 701/2000 [16:50<38:32,  1.78s/it]

Training_loss 16.17995


 35%|███▌      | 702/2000 [16:51<35:16,  1.63s/it]

Training_loss 16.20657


 35%|███▌      | 703/2000 [16:52<32:35,  1.51s/it]

Training_loss 16.22126


 35%|███▌      | 704/2000 [16:53<29:58,  1.39s/it]

Training_loss 16.25228


 35%|███▌      | 705/2000 [16:55<29:19,  1.36s/it]

Training_loss 16.26318


 35%|███▌      | 706/2000 [16:56<32:06,  1.49s/it]

Training_loss 16.27673


 35%|███▌      | 707/2000 [16:58<30:37,  1.42s/it]

Training_loss 16.27245


 35%|███▌      | 708/2000 [16:59<30:05,  1.40s/it]

Training_loss 16.27301


 35%|███▌      | 709/2000 [17:00<29:16,  1.36s/it]

Training_loss 16.27921


 36%|███▌      | 710/2000 [17:01<28:02,  1.30s/it]

Training_loss 16.29709


 36%|███▌      | 711/2000 [17:03<27:09,  1.26s/it]

Training_loss 16.31832


 36%|███▌      | 712/2000 [17:04<27:24,  1.28s/it]

Training_loss 16.30910


 36%|███▌      | 713/2000 [17:05<27:37,  1.29s/it]

Training_loss 16.30330


 36%|███▌      | 714/2000 [17:07<29:41,  1.38s/it]

Training_loss 16.31011


 36%|███▌      | 715/2000 [17:10<38:25,  1.79s/it]

Training_loss 16.30722


 36%|███▌      | 716/2000 [17:11<35:22,  1.65s/it]

Training_loss 16.29879


 36%|███▌      | 717/2000 [17:12<33:00,  1.54s/it]

Training_loss 16.29096


 36%|███▌      | 718/2000 [17:14<31:59,  1.50s/it]

Training_loss 16.29545


 36%|███▌      | 719/2000 [17:15<30:21,  1.42s/it]

Training_loss 16.29432


 36%|███▌      | 720/2000 [17:16<28:51,  1.35s/it]

Training_loss 16.27323


 36%|███▌      | 721/2000 [17:17<27:41,  1.30s/it]

Training_loss 16.27378


 36%|███▌      | 722/2000 [17:18<26:56,  1.26s/it]

Training_loss 16.28622


 36%|███▌      | 723/2000 [17:20<26:47,  1.26s/it]

Training_loss 16.29182


 36%|███▌      | 724/2000 [17:21<26:08,  1.23s/it]

Training_loss 16.29397


 36%|███▋      | 725/2000 [17:22<27:14,  1.28s/it]

Training_loss 16.29222


 36%|███▋      | 726/2000 [17:23<26:53,  1.27s/it]

Training_loss 16.31511


 36%|███▋      | 727/2000 [17:25<28:08,  1.33s/it]

Training_loss 16.32029


 36%|███▋      | 728/2000 [17:27<30:47,  1.45s/it]

Training_loss 16.29848


 36%|███▋      | 729/2000 [17:28<33:02,  1.56s/it]

Training_loss 16.29740


 36%|███▋      | 730/2000 [17:30<33:30,  1.58s/it]

Training_loss 16.27251


 37%|███▋      | 731/2000 [17:31<31:30,  1.49s/it]

Training_loss 16.26302


 37%|███▋      | 732/2000 [17:33<30:57,  1.46s/it]

Training_loss 16.23671


 37%|███▋      | 733/2000 [17:35<33:29,  1.59s/it]

Training_loss 16.24290


 37%|███▋      | 734/2000 [17:36<33:53,  1.61s/it]

Training_loss 16.22473


 37%|███▋      | 735/2000 [17:38<33:43,  1.60s/it]

Training_loss 16.22861


 37%|███▋      | 736/2000 [17:39<33:53,  1.61s/it]

Training_loss 16.23875


 37%|███▋      | 737/2000 [17:41<32:00,  1.52s/it]

Training_loss 16.25489


 37%|███▋      | 738/2000 [17:42<30:54,  1.47s/it]

Training_loss 16.24501


 37%|███▋      | 739/2000 [17:44<30:11,  1.44s/it]

Training_loss 16.26487


 37%|███▋      | 740/2000 [17:45<29:32,  1.41s/it]

Training_loss 16.25173


 37%|███▋      | 741/2000 [17:46<29:07,  1.39s/it]

Training_loss 16.25778


 37%|███▋      | 742/2000 [17:48<29:07,  1.39s/it]

Training_loss 16.24270


 37%|███▋      | 743/2000 [17:50<34:46,  1.66s/it]

Training_loss 16.26472


 37%|███▋      | 744/2000 [17:51<33:54,  1.62s/it]

Training_loss 16.25201


 37%|███▋      | 745/2000 [17:53<31:42,  1.52s/it]

Training_loss 16.29217


 37%|███▋      | 746/2000 [17:54<31:49,  1.52s/it]

Training_loss 16.30230


 37%|███▋      | 747/2000 [17:56<31:25,  1.50s/it]

Training_loss 16.30213


 37%|███▋      | 748/2000 [17:58<34:30,  1.65s/it]

Training_loss 16.29236


 37%|███▋      | 749/2000 [17:59<32:20,  1.55s/it]

Training_loss 16.30217


 38%|███▊      | 750/2000 [18:00<29:29,  1.42s/it]

Training_loss 16.31260


 38%|███▊      | 751/2000 [18:02<30:27,  1.46s/it]

Training_loss 16.29172


 38%|███▊      | 752/2000 [18:04<34:05,  1.64s/it]

Training_loss 16.26785


 38%|███▊      | 753/2000 [18:06<39:16,  1.89s/it]

Training_loss 16.26668


 38%|███▊      | 754/2000 [18:08<39:26,  1.90s/it]

Training_loss 16.26939


 38%|███▊      | 755/2000 [18:10<41:58,  2.02s/it]

Training_loss 16.23778


 38%|███▊      | 756/2000 [18:12<40:10,  1.94s/it]

Training_loss 16.25738


 38%|███▊      | 757/2000 [18:14<42:33,  2.05s/it]

Training_loss 16.28339


 38%|███▊      | 758/2000 [18:17<43:32,  2.10s/it]

Training_loss 16.25989


 38%|███▊      | 759/2000 [18:18<41:00,  1.98s/it]

Training_loss 16.24674


 38%|███▊      | 760/2000 [18:20<38:42,  1.87s/it]

Training_loss 16.25100


 38%|███▊      | 761/2000 [18:22<37:13,  1.80s/it]

Training_loss 16.25788


 38%|███▊      | 762/2000 [18:23<35:33,  1.72s/it]

Training_loss 16.26993


 38%|███▊      | 763/2000 [18:25<34:27,  1.67s/it]

Training_loss 16.24917


 38%|███▊      | 764/2000 [18:27<39:49,  1.93s/it]

Training_loss 16.23831


 38%|███▊      | 765/2000 [18:30<41:34,  2.02s/it]

Training_loss 16.23496


 38%|███▊      | 766/2000 [18:31<39:19,  1.91s/it]

Training_loss 16.21949


 38%|███▊      | 767/2000 [18:34<43:59,  2.14s/it]

Training_loss 16.24927


 38%|███▊      | 768/2000 [18:35<40:35,  1.98s/it]

Training_loss 16.25522


 38%|███▊      | 769/2000 [18:37<38:40,  1.89s/it]

Training_loss 16.22749


 38%|███▊      | 770/2000 [18:39<37:16,  1.82s/it]

Training_loss 16.24634


 39%|███▊      | 771/2000 [18:40<35:59,  1.76s/it]

Training_loss 16.21983


 39%|███▊      | 772/2000 [18:42<35:59,  1.76s/it]

Training_loss 16.22892


 39%|███▊      | 773/2000 [18:44<35:25,  1.73s/it]

Training_loss 16.22699


 39%|███▊      | 774/2000 [18:46<40:31,  1.98s/it]

Training_loss 16.22575


 39%|███▉      | 775/2000 [18:50<48:12,  2.36s/it]

Training_loss 16.23238


 39%|███▉      | 776/2000 [18:52<47:23,  2.32s/it]

Training_loss 16.24072


 39%|███▉      | 777/2000 [18:53<42:21,  2.08s/it]

Training_loss 16.24326


 39%|███▉      | 778/2000 [18:55<39:15,  1.93s/it]

Training_loss 16.25429


 39%|███▉      | 779/2000 [18:57<37:38,  1.85s/it]

Training_loss 16.28158


 39%|███▉      | 780/2000 [18:59<38:32,  1.90s/it]

Training_loss 16.29584


 39%|███▉      | 781/2000 [19:00<37:40,  1.85s/it]

Training_loss 16.29180


 39%|███▉      | 782/2000 [19:02<36:04,  1.78s/it]

Training_loss 16.30303


 39%|███▉      | 783/2000 [19:05<41:31,  2.05s/it]

Training_loss 16.30643


 39%|███▉      | 784/2000 [19:08<47:29,  2.34s/it]

Training_loss 16.32756


 39%|███▉      | 785/2000 [19:10<44:17,  2.19s/it]

Training_loss 16.37545


 39%|███▉      | 786/2000 [19:11<39:49,  1.97s/it]

Training_loss 16.37751


 39%|███▉      | 787/2000 [19:13<37:08,  1.84s/it]

Training_loss 16.35629


 39%|███▉      | 788/2000 [19:15<41:37,  2.06s/it]

Training_loss 16.37301


 39%|███▉      | 789/2000 [19:18<45:19,  2.25s/it]

Training_loss 16.38630


 40%|███▉      | 790/2000 [19:20<43:43,  2.17s/it]

Training_loss 16.37992


 40%|███▉      | 791/2000 [19:22<44:29,  2.21s/it]

Training_loss 16.39457


 40%|███▉      | 792/2000 [19:24<42:40,  2.12s/it]

Training_loss 16.39948


 40%|███▉      | 793/2000 [19:26<40:20,  2.01s/it]

Training_loss 16.42343


 40%|███▉      | 794/2000 [19:27<38:25,  1.91s/it]

Training_loss 16.43916


 40%|███▉      | 795/2000 [19:29<37:42,  1.88s/it]

Training_loss 16.43891


 40%|███▉      | 796/2000 [19:31<37:40,  1.88s/it]

Training_loss 16.43115


 40%|███▉      | 797/2000 [19:33<37:33,  1.87s/it]

Training_loss 16.44189


 40%|███▉      | 798/2000 [19:34<35:37,  1.78s/it]

Training_loss 16.44238


 40%|███▉      | 799/2000 [19:36<33:52,  1.69s/it]

Training_loss 16.44823


 40%|████      | 800/2000 [19:38<32:49,  1.64s/it]

Training_loss 16.48543


 40%|████      | 801/2000 [19:39<32:45,  1.64s/it]

Training_loss 16.47656


 40%|████      | 802/2000 [19:42<38:02,  1.90s/it]

Training_loss 16.51426


 40%|████      | 803/2000 [19:44<43:17,  2.17s/it]

Training_loss 16.52092


 40%|████      | 804/2000 [19:47<44:10,  2.22s/it]

Training_loss 16.52526


 40%|████      | 805/2000 [19:49<45:24,  2.28s/it]

Training_loss 16.55291


 40%|████      | 806/2000 [19:51<41:50,  2.10s/it]

Training_loss 16.56371


 40%|████      | 807/2000 [19:52<37:26,  1.88s/it]

Training_loss 16.57157


 40%|████      | 808/2000 [19:54<34:37,  1.74s/it]

Training_loss 16.55724


 40%|████      | 809/2000 [19:55<33:44,  1.70s/it]

Training_loss 16.55824


 40%|████      | 810/2000 [19:57<32:10,  1.62s/it]

Training_loss 16.56555


 41%|████      | 811/2000 [19:58<31:04,  1.57s/it]

Training_loss 16.56348


 41%|████      | 812/2000 [20:01<36:24,  1.84s/it]

Training_loss 16.53317


 41%|████      | 813/2000 [20:03<38:47,  1.96s/it]

Training_loss 16.51660


 41%|████      | 814/2000 [20:05<37:08,  1.88s/it]

Training_loss 16.51324


 41%|████      | 815/2000 [20:06<34:47,  1.76s/it]

Training_loss 16.52186


 41%|████      | 816/2000 [20:08<34:09,  1.73s/it]

Training_loss 16.51656


 41%|████      | 817/2000 [20:09<32:45,  1.66s/it]

Training_loss 16.51479


 41%|████      | 818/2000 [20:11<31:48,  1.61s/it]

Training_loss 16.52919


 41%|████      | 819/2000 [20:12<31:19,  1.59s/it]

Training_loss 16.54902


 41%|████      | 820/2000 [20:14<31:11,  1.59s/it]

Training_loss 16.54814


 41%|████      | 821/2000 [20:15<30:33,  1.56s/it]

Training_loss 16.53438


 41%|████      | 822/2000 [20:17<30:31,  1.56s/it]

Training_loss 16.54866


 41%|████      | 823/2000 [20:19<32:30,  1.66s/it]

Training_loss 16.55374


 41%|████      | 824/2000 [20:21<33:14,  1.70s/it]

Training_loss 16.54555


 41%|████▏     | 825/2000 [20:22<32:05,  1.64s/it]

Training_loss 16.56977


 41%|████▏     | 826/2000 [20:24<30:57,  1.58s/it]

Training_loss 16.58755


 41%|████▏     | 827/2000 [20:25<30:53,  1.58s/it]

Training_loss 16.59747


 41%|████▏     | 828/2000 [20:27<30:27,  1.56s/it]

Training_loss 16.61356


 41%|████▏     | 829/2000 [20:28<29:36,  1.52s/it]

Training_loss 16.61428


 42%|████▏     | 830/2000 [20:31<35:34,  1.82s/it]

Training_loss 16.64032


 42%|████▏     | 831/2000 [20:32<35:15,  1.81s/it]

Training_loss 16.64163


 42%|████▏     | 832/2000 [20:34<33:43,  1.73s/it]

Training_loss 16.65644


 42%|████▏     | 833/2000 [20:36<33:04,  1.70s/it]

Training_loss 16.65995


 42%|████▏     | 834/2000 [20:37<30:52,  1.59s/it]

Training_loss 16.69534


 42%|████▏     | 835/2000 [20:38<29:04,  1.50s/it]

Training_loss 16.66746


 42%|████▏     | 836/2000 [20:40<28:46,  1.48s/it]

Training_loss 16.66482


 42%|████▏     | 837/2000 [20:41<28:42,  1.48s/it]

Training_loss 16.65738


 42%|████▏     | 838/2000 [20:42<28:19,  1.46s/it]

Training_loss 16.65664


 42%|████▏     | 839/2000 [20:44<28:24,  1.47s/it]

Training_loss 16.65652


 42%|████▏     | 840/2000 [20:45<28:11,  1.46s/it]

Training_loss 16.66513


 42%|████▏     | 841/2000 [20:47<27:17,  1.41s/it]

Training_loss 16.67766


 42%|████▏     | 842/2000 [20:48<26:59,  1.40s/it]

Training_loss 16.68896


 42%|████▏     | 843/2000 [20:49<27:13,  1.41s/it]

Training_loss 16.66481


 42%|████▏     | 844/2000 [20:51<29:07,  1.51s/it]

Training_loss 16.67817


 42%|████▏     | 845/2000 [20:53<28:38,  1.49s/it]

Training_loss 16.66508


 42%|████▏     | 846/2000 [20:55<31:22,  1.63s/it]

Training_loss 16.65268


 42%|████▏     | 847/2000 [20:56<30:46,  1.60s/it]

Training_loss 16.66182


 42%|████▏     | 848/2000 [20:58<31:44,  1.65s/it]

Training_loss 16.65724


 42%|████▏     | 849/2000 [21:00<34:47,  1.81s/it]

Training_loss 16.66131


 42%|████▎     | 850/2000 [21:02<32:25,  1.69s/it]

Training_loss 16.64813


 43%|████▎     | 851/2000 [21:03<30:54,  1.61s/it]

Training_loss 16.62672


 43%|████▎     | 852/2000 [21:05<30:29,  1.59s/it]

Training_loss 16.61775


 43%|████▎     | 853/2000 [21:06<31:59,  1.67s/it]

Training_loss 16.63093


 43%|████▎     | 854/2000 [21:08<32:24,  1.70s/it]

Training_loss 16.62472


 43%|████▎     | 855/2000 [21:10<33:56,  1.78s/it]

Training_loss 16.64892


 43%|████▎     | 856/2000 [21:12<35:53,  1.88s/it]

Training_loss 16.61648


 43%|████▎     | 857/2000 [21:14<33:55,  1.78s/it]

Training_loss 16.66496


 43%|████▎     | 858/2000 [21:15<33:24,  1.76s/it]

Training_loss 16.64259


 43%|████▎     | 859/2000 [21:18<35:51,  1.89s/it]

Training_loss 16.61503


 43%|████▎     | 860/2000 [21:19<34:50,  1.83s/it]

Training_loss 16.63274


 43%|████▎     | 861/2000 [21:22<37:07,  1.96s/it]

Training_loss 16.62750


 43%|████▎     | 862/2000 [21:24<38:24,  2.03s/it]

Training_loss 16.59532


 43%|████▎     | 863/2000 [21:25<36:03,  1.90s/it]

Training_loss 16.59931


 43%|████▎     | 864/2000 [21:27<36:22,  1.92s/it]

Training_loss 16.57545


 43%|████▎     | 865/2000 [21:29<35:05,  1.85s/it]

Training_loss 16.57958


 43%|████▎     | 866/2000 [21:31<35:58,  1.90s/it]

Training_loss 16.59253


 43%|████▎     | 867/2000 [21:33<35:01,  1.85s/it]

Training_loss 16.61521


 43%|████▎     | 868/2000 [21:34<33:38,  1.78s/it]

Training_loss 16.63618


 43%|████▎     | 869/2000 [21:36<32:33,  1.73s/it]

Training_loss 16.62313


 44%|████▎     | 870/2000 [21:38<32:41,  1.74s/it]

Training_loss 16.63497


 44%|████▎     | 871/2000 [21:39<32:20,  1.72s/it]

Training_loss 16.65565


 44%|████▎     | 872/2000 [21:42<34:28,  1.83s/it]

Training_loss 16.66832


 44%|████▎     | 873/2000 [21:44<39:13,  2.09s/it]

Training_loss 16.67833


 44%|████▎     | 874/2000 [21:47<41:22,  2.20s/it]

Training_loss 16.66410


 44%|████▍     | 875/2000 [21:48<37:30,  2.00s/it]

Training_loss 16.66860


 44%|████▍     | 876/2000 [21:50<35:56,  1.92s/it]

Training_loss 16.68357


 44%|████▍     | 877/2000 [21:52<35:02,  1.87s/it]

Training_loss 16.67571


 44%|████▍     | 878/2000 [21:54<38:07,  2.04s/it]

Training_loss 16.69430


 44%|████▍     | 879/2000 [21:57<42:26,  2.27s/it]

Training_loss 16.70748


 44%|████▍     | 880/2000 [21:59<40:59,  2.20s/it]

Training_loss 16.68450


 44%|████▍     | 881/2000 [22:01<37:54,  2.03s/it]

Training_loss 16.70611


 44%|████▍     | 882/2000 [22:02<34:02,  1.83s/it]

Training_loss 16.73323


 44%|████▍     | 883/2000 [22:04<33:00,  1.77s/it]

Training_loss 16.74216


 44%|████▍     | 884/2000 [22:05<30:59,  1.67s/it]

Training_loss 16.74128


 44%|████▍     | 885/2000 [22:06<29:11,  1.57s/it]

Training_loss 16.74553


 44%|████▍     | 886/2000 [22:08<28:15,  1.52s/it]

Training_loss 16.76917


 44%|████▍     | 887/2000 [22:09<28:29,  1.54s/it]

Training_loss 16.77306


 44%|████▍     | 888/2000 [22:11<29:12,  1.58s/it]

Training_loss 16.78458


 44%|████▍     | 889/2000 [22:12<28:11,  1.52s/it]

Training_loss 16.81931


 44%|████▍     | 890/2000 [22:14<27:46,  1.50s/it]

Training_loss 16.84100


 45%|████▍     | 891/2000 [22:15<27:03,  1.46s/it]

Training_loss 16.83776


 45%|████▍     | 892/2000 [22:17<26:36,  1.44s/it]

Training_loss 16.85401


 45%|████▍     | 893/2000 [22:18<26:15,  1.42s/it]

Training_loss 16.84519


 45%|████▍     | 894/2000 [22:19<25:54,  1.41s/it]

Training_loss 16.83414


 45%|████▍     | 895/2000 [22:21<25:48,  1.40s/it]

Training_loss 16.86358


 45%|████▍     | 896/2000 [22:22<25:48,  1.40s/it]

Training_loss 16.81897


 45%|████▍     | 897/2000 [22:24<25:07,  1.37s/it]

Training_loss 16.81378


 45%|████▍     | 898/2000 [22:25<24:57,  1.36s/it]

Training_loss 16.82540


 45%|████▍     | 899/2000 [22:26<26:26,  1.44s/it]

Training_loss 16.84531


 45%|████▌     | 900/2000 [22:28<25:40,  1.40s/it]

Training_loss 16.81970


 45%|████▌     | 901/2000 [22:29<25:21,  1.38s/it]

Training_loss 16.80567


 45%|████▌     | 902/2000 [22:30<24:37,  1.35s/it]

Training_loss 16.79567


 45%|████▌     | 903/2000 [22:32<25:09,  1.38s/it]

Training_loss 16.79018


 45%|████▌     | 904/2000 [22:33<26:09,  1.43s/it]

Training_loss 16.75631


 45%|████▌     | 905/2000 [22:35<26:28,  1.45s/it]

Training_loss 16.76476


 45%|████▌     | 906/2000 [22:36<25:36,  1.40s/it]

Training_loss 16.78916


 45%|████▌     | 907/2000 [22:38<25:18,  1.39s/it]

Training_loss 16.80767


 45%|████▌     | 908/2000 [22:39<24:53,  1.37s/it]

Training_loss 16.81617


 45%|████▌     | 909/2000 [22:40<24:36,  1.35s/it]

Training_loss 16.83786


 46%|████▌     | 910/2000 [22:42<25:20,  1.39s/it]

Training_loss 16.81749


 46%|████▌     | 911/2000 [22:43<26:27,  1.46s/it]

Training_loss 16.80756


 46%|████▌     | 912/2000 [22:45<26:17,  1.45s/it]

Training_loss 16.82358


 46%|████▌     | 913/2000 [22:46<25:21,  1.40s/it]

Training_loss 16.83300


 46%|████▌     | 914/2000 [22:47<24:29,  1.35s/it]

Training_loss 16.85420


 46%|████▌     | 915/2000 [22:48<23:53,  1.32s/it]

Training_loss 16.83165


 46%|████▌     | 916/2000 [22:50<24:06,  1.33s/it]

Training_loss 16.80891


 46%|████▌     | 917/2000 [22:51<24:55,  1.38s/it]

Training_loss 16.79196


 46%|████▌     | 918/2000 [22:53<25:25,  1.41s/it]

Training_loss 16.77973


 46%|████▌     | 919/2000 [22:54<24:58,  1.39s/it]

Training_loss 16.78670


 46%|████▌     | 920/2000 [22:55<24:23,  1.35s/it]

Training_loss 16.79696


 46%|████▌     | 921/2000 [22:57<23:51,  1.33s/it]

Training_loss 16.81360


 46%|████▌     | 922/2000 [22:58<26:10,  1.46s/it]

Training_loss 16.83039


 46%|████▌     | 923/2000 [23:00<26:19,  1.47s/it]

Training_loss 16.83454


 46%|████▌     | 924/2000 [23:01<25:07,  1.40s/it]

Training_loss 16.82848


 46%|████▋     | 925/2000 [23:03<24:41,  1.38s/it]

Training_loss 16.83784


 46%|████▋     | 926/2000 [23:04<24:30,  1.37s/it]

Training_loss 16.82333


 46%|████▋     | 927/2000 [23:05<24:02,  1.34s/it]

Training_loss 16.83771


 46%|████▋     | 928/2000 [23:07<25:18,  1.42s/it]

Training_loss 16.86501


 46%|████▋     | 929/2000 [23:08<24:49,  1.39s/it]

Training_loss 16.81165


 46%|████▋     | 930/2000 [23:09<24:40,  1.38s/it]

Training_loss 16.81836


 47%|████▋     | 931/2000 [23:11<24:52,  1.40s/it]

Training_loss 16.82638


 47%|████▋     | 932/2000 [23:12<24:28,  1.38s/it]

Training_loss 16.82745


 47%|████▋     | 933/2000 [23:14<25:17,  1.42s/it]

Training_loss 16.83495


 47%|████▋     | 934/2000 [23:15<25:44,  1.45s/it]

Training_loss 16.80130


 47%|████▋     | 935/2000 [23:17<25:27,  1.43s/it]

Training_loss 16.80595


 47%|████▋     | 936/2000 [23:18<24:57,  1.41s/it]

Training_loss 16.79513


 47%|████▋     | 937/2000 [23:19<24:21,  1.37s/it]

Training_loss 16.78738


 47%|████▋     | 938/2000 [23:21<24:05,  1.36s/it]

Training_loss 16.78605


 47%|████▋     | 939/2000 [23:22<24:13,  1.37s/it]

Training_loss 16.75925


 47%|████▋     | 940/2000 [23:23<23:53,  1.35s/it]

Training_loss 16.76316


 47%|████▋     | 941/2000 [23:25<23:24,  1.33s/it]

Training_loss 16.74268


 47%|████▋     | 942/2000 [23:26<23:28,  1.33s/it]

Training_loss 16.74543


 47%|████▋     | 943/2000 [23:27<23:19,  1.32s/it]

Training_loss 16.75482


 47%|████▋     | 944/2000 [23:29<23:09,  1.32s/it]

Training_loss 16.72840


 47%|████▋     | 945/2000 [23:30<24:15,  1.38s/it]

Training_loss 16.71883


 47%|████▋     | 946/2000 [23:31<24:29,  1.39s/it]

Training_loss 16.72972


 47%|████▋     | 947/2000 [23:33<24:06,  1.37s/it]

Training_loss 16.70749


 47%|████▋     | 948/2000 [23:34<24:12,  1.38s/it]

Training_loss 16.70466


 47%|████▋     | 949/2000 [23:36<23:51,  1.36s/it]

Training_loss 16.71795


 48%|████▊     | 950/2000 [23:37<23:37,  1.35s/it]

Training_loss 16.74288


 48%|████▊     | 951/2000 [23:38<23:50,  1.36s/it]

Training_loss 16.75725


 48%|████▊     | 952/2000 [23:40<23:47,  1.36s/it]

Training_loss 16.74773


 48%|████▊     | 953/2000 [23:41<23:42,  1.36s/it]

Training_loss 16.75243


 48%|████▊     | 954/2000 [23:42<23:36,  1.35s/it]

Training_loss 16.75030


 48%|████▊     | 955/2000 [23:44<23:18,  1.34s/it]

Training_loss 16.78248


 48%|████▊     | 956/2000 [23:45<23:21,  1.34s/it]

Training_loss 16.79211


 48%|████▊     | 957/2000 [23:47<24:53,  1.43s/it]

Training_loss 16.77834


 48%|████▊     | 958/2000 [23:48<24:26,  1.41s/it]

Training_loss 16.79928


 48%|████▊     | 959/2000 [23:49<23:48,  1.37s/it]

Training_loss 16.81562


 48%|████▊     | 960/2000 [23:51<23:34,  1.36s/it]

Training_loss 16.83377


 48%|████▊     | 961/2000 [23:52<22:54,  1.32s/it]

Training_loss 16.82673


 48%|████▊     | 962/2000 [23:53<23:19,  1.35s/it]

Training_loss 16.84575


 48%|████▊     | 963/2000 [23:55<23:26,  1.36s/it]

Training_loss 16.82545


 48%|████▊     | 964/2000 [23:56<22:44,  1.32s/it]

Training_loss 16.80702


 48%|████▊     | 965/2000 [23:57<22:27,  1.30s/it]

Training_loss 16.79269


 48%|████▊     | 966/2000 [23:58<22:43,  1.32s/it]

Training_loss 16.78153


 48%|████▊     | 967/2000 [24:00<23:55,  1.39s/it]

Training_loss 16.78818


 48%|████▊     | 968/2000 [24:02<24:38,  1.43s/it]

Training_loss 16.81411


 48%|████▊     | 969/2000 [24:03<25:20,  1.47s/it]

Training_loss 16.81570


 48%|████▊     | 970/2000 [24:04<24:35,  1.43s/it]

Training_loss 16.83238


 49%|████▊     | 971/2000 [24:06<23:52,  1.39s/it]

Training_loss 16.84254


 49%|████▊     | 972/2000 [24:07<23:43,  1.39s/it]

Training_loss 16.85870


 49%|████▊     | 973/2000 [24:08<23:02,  1.35s/it]

Training_loss 16.81217


 49%|████▊     | 974/2000 [24:10<23:10,  1.36s/it]

Training_loss 16.82424


 49%|████▉     | 975/2000 [24:11<23:51,  1.40s/it]

Training_loss 16.84576


 49%|████▉     | 976/2000 [24:13<23:45,  1.39s/it]

Training_loss 16.85084


 49%|████▉     | 977/2000 [24:14<23:25,  1.37s/it]

Training_loss 16.83669


 49%|████▉     | 978/2000 [24:15<22:56,  1.35s/it]

Training_loss 16.83229


 49%|████▉     | 979/2000 [24:17<22:44,  1.34s/it]

Training_loss 16.84937


 49%|████▉     | 980/2000 [24:18<24:14,  1.43s/it]

Training_loss 16.89365


 49%|████▉     | 981/2000 [24:20<24:29,  1.44s/it]

Training_loss 16.89265


 49%|████▉     | 982/2000 [24:21<24:15,  1.43s/it]

Training_loss 16.90680


 49%|████▉     | 983/2000 [24:23<24:32,  1.45s/it]

Training_loss 16.89476


 49%|████▉     | 984/2000 [24:24<23:41,  1.40s/it]

Training_loss 16.91911


 49%|████▉     | 985/2000 [24:25<23:20,  1.38s/it]

Training_loss 16.92605


 49%|████▉     | 986/2000 [24:26<22:57,  1.36s/it]

Training_loss 16.91451


 49%|████▉     | 987/2000 [24:28<22:42,  1.35s/it]

Training_loss 16.87259


 49%|████▉     | 988/2000 [24:29<22:29,  1.33s/it]

Training_loss 16.87224


 49%|████▉     | 989/2000 [24:30<22:08,  1.31s/it]

Training_loss 16.86870


 50%|████▉     | 990/2000 [24:32<22:41,  1.35s/it]

Training_loss 16.84567


 50%|████▉     | 991/2000 [24:33<22:55,  1.36s/it]

Training_loss 16.85397


 50%|████▉     | 992/2000 [24:35<24:19,  1.45s/it]

Training_loss 16.82732


 50%|████▉     | 993/2000 [24:36<24:36,  1.47s/it]

Training_loss 16.82298


 50%|████▉     | 994/2000 [24:38<24:05,  1.44s/it]

Training_loss 16.83081


 50%|████▉     | 995/2000 [24:39<23:35,  1.41s/it]

Training_loss 16.82492


 50%|████▉     | 996/2000 [24:40<23:25,  1.40s/it]

Training_loss 16.83113


 50%|████▉     | 997/2000 [24:42<23:42,  1.42s/it]

Training_loss 16.85066


 50%|████▉     | 998/2000 [24:43<23:37,  1.41s/it]

Training_loss 16.83764


 50%|████▉     | 999/2000 [24:45<23:02,  1.38s/it]

Training_loss 16.80030


 50%|█████     | 1000/2000 [24:46<23:03,  1.38s/it]

Training_loss 16.83670


 50%|█████     | 1001/2000 [24:47<22:30,  1.35s/it]

Training_loss 16.85308


 50%|█████     | 1002/2000 [24:49<22:23,  1.35s/it]

Training_loss 16.86293


 50%|█████     | 1003/2000 [24:50<23:14,  1.40s/it]

Training_loss 16.87630


 50%|█████     | 1004/2000 [24:52<23:29,  1.42s/it]

Training_loss 16.84697


 50%|█████     | 1005/2000 [24:53<23:06,  1.39s/it]

Training_loss 16.87029


 50%|█████     | 1006/2000 [24:54<22:54,  1.38s/it]

Training_loss 16.86363


 50%|█████     | 1007/2000 [24:56<22:33,  1.36s/it]

Training_loss 16.89400


 50%|█████     | 1008/2000 [24:57<23:01,  1.39s/it]

Training_loss 16.89677


 50%|█████     | 1009/2000 [24:59<24:09,  1.46s/it]

Training_loss 16.86546


 50%|█████     | 1010/2000 [25:00<25:31,  1.55s/it]

Training_loss 16.84925


 51%|█████     | 1011/2000 [25:02<25:45,  1.56s/it]

Training_loss 16.84724


 51%|█████     | 1012/2000 [25:04<25:53,  1.57s/it]

Training_loss 16.85810


 51%|█████     | 1013/2000 [25:05<26:31,  1.61s/it]

Training_loss 16.85235


 51%|█████     | 1014/2000 [25:07<28:08,  1.71s/it]

Training_loss 16.89080


 51%|█████     | 1015/2000 [25:09<28:00,  1.71s/it]

Training_loss 16.89761


 51%|█████     | 1016/2000 [25:11<27:22,  1.67s/it]

Training_loss 16.89128


 51%|█████     | 1017/2000 [25:12<27:36,  1.69s/it]

Training_loss 16.88360


 51%|█████     | 1018/2000 [25:14<27:49,  1.70s/it]

Training_loss 16.85699


 51%|█████     | 1019/2000 [25:16<27:24,  1.68s/it]

Training_loss 16.85941


 51%|█████     | 1020/2000 [25:17<27:06,  1.66s/it]

Training_loss 16.86556


 51%|█████     | 1021/2000 [25:19<26:04,  1.60s/it]

Training_loss 16.89480


 51%|█████     | 1022/2000 [25:20<26:04,  1.60s/it]

Training_loss 16.90656


 51%|█████     | 1023/2000 [25:22<27:02,  1.66s/it]

Training_loss 16.91220


 51%|█████     | 1024/2000 [25:24<28:02,  1.72s/it]

Training_loss 16.90468


 51%|█████▏    | 1025/2000 [25:26<27:26,  1.69s/it]

Training_loss 16.91385


 51%|█████▏    | 1026/2000 [25:27<26:47,  1.65s/it]

Training_loss 16.92148


 51%|█████▏    | 1027/2000 [25:30<32:48,  2.02s/it]

Training_loss 16.93097


 51%|█████▏    | 1028/2000 [25:33<35:10,  2.17s/it]

Training_loss 16.99166


 51%|█████▏    | 1029/2000 [25:35<35:17,  2.18s/it]

Training_loss 16.99373


 52%|█████▏    | 1030/2000 [25:37<34:29,  2.13s/it]

Training_loss 17.00854


 52%|█████▏    | 1031/2000 [25:39<36:30,  2.26s/it]

Training_loss 17.01877


 52%|█████▏    | 1032/2000 [25:42<36:17,  2.25s/it]

Training_loss 17.01964


 52%|█████▏    | 1033/2000 [25:44<35:44,  2.22s/it]

Training_loss 17.03292


 52%|█████▏    | 1034/2000 [25:45<32:47,  2.04s/it]

Training_loss 17.01159


 52%|█████▏    | 1035/2000 [25:47<29:32,  1.84s/it]

Training_loss 17.02554


 52%|█████▏    | 1036/2000 [25:48<27:42,  1.72s/it]

Training_loss 17.04331


 52%|█████▏    | 1037/2000 [25:51<33:43,  2.10s/it]

Training_loss 17.02675


 52%|█████▏    | 1038/2000 [25:54<38:41,  2.41s/it]

Training_loss 17.00926


 52%|█████▏    | 1039/2000 [25:58<43:32,  2.72s/it]

Training_loss 17.01220


 52%|█████▏    | 1040/2000 [26:01<46:20,  2.90s/it]

Training_loss 17.02359


 52%|█████▏    | 1041/2000 [26:04<47:31,  2.97s/it]

Training_loss 17.01992


 52%|█████▏    | 1042/2000 [26:06<42:58,  2.69s/it]

Training_loss 17.01391


 52%|█████▏    | 1043/2000 [26:09<41:20,  2.59s/it]

Training_loss 16.97089


 52%|█████▏    | 1044/2000 [26:10<36:09,  2.27s/it]

Training_loss 16.97834


 52%|█████▏    | 1045/2000 [26:11<31:39,  1.99s/it]

Training_loss 16.98159


 52%|█████▏    | 1046/2000 [26:13<29:12,  1.84s/it]

Training_loss 16.98918


 52%|█████▏    | 1047/2000 [26:15<32:25,  2.04s/it]

Training_loss 16.97114


 52%|█████▏    | 1048/2000 [26:18<34:20,  2.16s/it]

Training_loss 16.98438


 52%|█████▏    | 1049/2000 [26:19<30:48,  1.94s/it]

Training_loss 16.96922


 52%|█████▎    | 1050/2000 [26:21<27:56,  1.76s/it]

Training_loss 16.96341


 53%|█████▎    | 1051/2000 [26:22<28:06,  1.78s/it]

Training_loss 16.98805


 53%|█████▎    | 1052/2000 [26:24<29:22,  1.86s/it]

Training_loss 16.96860


 53%|█████▎    | 1053/2000 [26:27<32:45,  2.08s/it]

Training_loss 16.96829


 53%|█████▎    | 1054/2000 [26:28<29:25,  1.87s/it]

Training_loss 17.01058


 53%|█████▎    | 1055/2000 [26:30<26:14,  1.67s/it]

Training_loss 17.01852


 53%|█████▎    | 1056/2000 [26:31<23:41,  1.51s/it]

Training_loss 17.02954


 53%|█████▎    | 1057/2000 [26:32<23:01,  1.47s/it]

Training_loss 17.03579


 53%|█████▎    | 1058/2000 [26:33<22:16,  1.42s/it]

Training_loss 17.07897


 53%|█████▎    | 1059/2000 [26:35<23:54,  1.52s/it]

Training_loss 17.10350


 53%|█████▎    | 1060/2000 [26:38<31:08,  1.99s/it]

Training_loss 17.13967


 53%|█████▎    | 1061/2000 [26:41<35:51,  2.29s/it]

Training_loss 17.13559


 53%|█████▎    | 1062/2000 [26:44<37:50,  2.42s/it]

Training_loss 17.12412


 53%|█████▎    | 1063/2000 [26:47<38:14,  2.45s/it]

Training_loss 17.12115


 53%|█████▎    | 1064/2000 [26:50<40:51,  2.62s/it]

Training_loss 17.13192


 53%|█████▎    | 1065/2000 [26:52<38:19,  2.46s/it]

Training_loss 17.12136


 53%|█████▎    | 1066/2000 [26:53<34:03,  2.19s/it]

Training_loss 17.15960


 53%|█████▎    | 1067/2000 [26:55<30:58,  1.99s/it]

Training_loss 17.20447


 53%|█████▎    | 1068/2000 [26:56<27:18,  1.76s/it]

Training_loss 17.21548


 53%|█████▎    | 1069/2000 [26:57<26:05,  1.68s/it]

Training_loss 17.17825


 54%|█████▎    | 1070/2000 [26:59<25:59,  1.68s/it]

Training_loss 17.16878


 54%|█████▎    | 1071/2000 [27:00<23:46,  1.53s/it]

Training_loss 17.15083


 54%|█████▎    | 1072/2000 [27:02<22:31,  1.46s/it]

Training_loss 17.15537


 54%|█████▎    | 1073/2000 [27:03<21:31,  1.39s/it]

Training_loss 17.14696


 54%|█████▎    | 1074/2000 [27:04<20:36,  1.34s/it]

Training_loss 17.11732


 54%|█████▍    | 1075/2000 [27:05<19:53,  1.29s/it]

Training_loss 17.15620


 54%|█████▍    | 1076/2000 [27:06<19:39,  1.28s/it]

Training_loss 17.16126


 54%|█████▍    | 1077/2000 [27:08<18:48,  1.22s/it]

Training_loss 17.15758


 54%|█████▍    | 1078/2000 [27:09<18:09,  1.18s/it]

Training_loss 17.19997


 54%|█████▍    | 1079/2000 [27:10<18:07,  1.18s/it]

Training_loss 17.23380


 54%|█████▍    | 1080/2000 [27:11<18:08,  1.18s/it]

Training_loss 17.22284


 54%|█████▍    | 1081/2000 [27:12<17:57,  1.17s/it]

Training_loss 17.25787


 54%|█████▍    | 1082/2000 [27:14<19:20,  1.26s/it]

Training_loss 17.28027


 54%|█████▍    | 1083/2000 [27:15<19:51,  1.30s/it]

Training_loss 17.28279


 54%|█████▍    | 1084/2000 [27:16<19:08,  1.25s/it]

Training_loss 17.30865


 54%|█████▍    | 1085/2000 [27:17<18:50,  1.24s/it]

Training_loss 17.28244


 54%|█████▍    | 1086/2000 [27:18<18:15,  1.20s/it]

Training_loss 17.25270


 54%|█████▍    | 1087/2000 [27:20<18:14,  1.20s/it]

Training_loss 17.27320


 54%|█████▍    | 1088/2000 [27:21<18:06,  1.19s/it]

Training_loss 17.27869


 54%|█████▍    | 1089/2000 [27:22<18:08,  1.19s/it]

Training_loss 17.29271


 55%|█████▍    | 1090/2000 [27:23<18:01,  1.19s/it]

Training_loss 17.29944


 55%|█████▍    | 1091/2000 [27:24<17:54,  1.18s/it]

Training_loss 17.29982


 55%|█████▍    | 1092/2000 [27:26<17:42,  1.17s/it]

Training_loss 17.30453


 55%|█████▍    | 1093/2000 [27:27<17:29,  1.16s/it]

Training_loss 17.30229


 55%|█████▍    | 1094/2000 [27:28<17:22,  1.15s/it]

Training_loss 17.29552


 55%|█████▍    | 1095/2000 [27:29<17:24,  1.15s/it]

Training_loss 17.30796


 55%|█████▍    | 1096/2000 [27:30<19:04,  1.27s/it]

Training_loss 17.31412


 55%|█████▍    | 1097/2000 [27:32<19:06,  1.27s/it]

Training_loss 17.29905


 55%|█████▍    | 1098/2000 [27:33<18:40,  1.24s/it]

Training_loss 17.34453


 55%|█████▍    | 1099/2000 [27:34<18:23,  1.22s/it]

Training_loss 17.34671


 55%|█████▌    | 1100/2000 [27:35<18:01,  1.20s/it]

Training_loss 17.33455


 55%|█████▌    | 1101/2000 [27:36<18:03,  1.21s/it]

Training_loss 17.32758


 55%|█████▌    | 1102/2000 [27:38<17:41,  1.18s/it]

Training_loss 17.34368


 55%|█████▌    | 1103/2000 [27:39<17:30,  1.17s/it]

Training_loss 17.36137


 55%|█████▌    | 1104/2000 [27:40<17:29,  1.17s/it]

Training_loss 17.35989


 55%|█████▌    | 1105/2000 [27:41<17:30,  1.17s/it]

Training_loss 17.35870


 55%|█████▌    | 1106/2000 [27:42<17:04,  1.15s/it]

Training_loss 17.37354


 55%|█████▌    | 1107/2000 [27:43<17:17,  1.16s/it]

Training_loss 17.39171


 55%|█████▌    | 1108/2000 [27:45<17:33,  1.18s/it]

Training_loss 17.39969


 55%|█████▌    | 1109/2000 [27:46<18:49,  1.27s/it]

Training_loss 17.42171


 56%|█████▌    | 1110/2000 [27:47<18:57,  1.28s/it]

Training_loss 17.42155


 56%|█████▌    | 1111/2000 [27:49<18:44,  1.27s/it]

Training_loss 17.42188


 56%|█████▌    | 1112/2000 [27:50<18:29,  1.25s/it]

Training_loss 17.41083


 56%|█████▌    | 1113/2000 [27:51<18:09,  1.23s/it]

Training_loss 17.46198


 56%|█████▌    | 1114/2000 [27:52<18:27,  1.25s/it]

Training_loss 17.48591


 56%|█████▌    | 1115/2000 [27:54<18:31,  1.26s/it]

Training_loss 17.48390


 56%|█████▌    | 1116/2000 [27:55<17:59,  1.22s/it]

Training_loss 17.47920


 56%|█████▌    | 1117/2000 [27:56<17:55,  1.22s/it]

Training_loss 17.45017


 56%|█████▌    | 1118/2000 [27:57<18:12,  1.24s/it]

Training_loss 17.44319


 56%|█████▌    | 1119/2000 [27:59<19:05,  1.30s/it]

Training_loss 17.41883


 56%|█████▌    | 1120/2000 [28:00<19:49,  1.35s/it]

Training_loss 17.45344


 56%|█████▌    | 1121/2000 [28:02<20:02,  1.37s/it]

Training_loss 17.44660


 56%|█████▌    | 1122/2000 [28:03<19:50,  1.36s/it]

Training_loss 17.46829


 56%|█████▌    | 1123/2000 [28:04<19:11,  1.31s/it]

Training_loss 17.48803


 56%|█████▌    | 1124/2000 [28:05<19:03,  1.31s/it]

Training_loss 17.46805


 56%|█████▋    | 1125/2000 [28:07<19:26,  1.33s/it]

Training_loss 17.48031


 56%|█████▋    | 1126/2000 [28:08<18:48,  1.29s/it]

Training_loss 17.46399


 56%|█████▋    | 1127/2000 [28:09<18:34,  1.28s/it]

Training_loss 17.44443


 56%|█████▋    | 1128/2000 [28:10<18:11,  1.25s/it]

Training_loss 17.45618


 56%|█████▋    | 1129/2000 [28:12<18:13,  1.26s/it]

Training_loss 17.46786


 56%|█████▋    | 1130/2000 [28:13<18:56,  1.31s/it]

Training_loss 17.50501


 57%|█████▋    | 1131/2000 [28:14<18:44,  1.29s/it]

Training_loss 17.51272


 57%|█████▋    | 1132/2000 [28:16<18:38,  1.29s/it]

Training_loss 17.49199


 57%|█████▋    | 1133/2000 [28:17<18:20,  1.27s/it]

Training_loss 17.49204


 57%|█████▋    | 1134/2000 [28:18<19:11,  1.33s/it]

Training_loss 17.55117


 57%|█████▋    | 1135/2000 [28:20<18:48,  1.30s/it]

Training_loss 17.56614


 57%|█████▋    | 1136/2000 [28:21<18:09,  1.26s/it]

Training_loss 17.57784


 57%|█████▋    | 1137/2000 [28:22<17:40,  1.23s/it]

Training_loss 17.61787


 57%|█████▋    | 1138/2000 [28:23<18:02,  1.26s/it]

Training_loss 17.61391


 57%|█████▋    | 1139/2000 [28:24<17:39,  1.23s/it]

Training_loss 17.58424


 57%|█████▋    | 1140/2000 [28:26<17:15,  1.20s/it]

Training_loss 17.57463


 57%|█████▋    | 1141/2000 [28:27<16:52,  1.18s/it]

Training_loss 17.57826


 57%|█████▋    | 1142/2000 [28:28<16:45,  1.17s/it]

Training_loss 17.59288


 57%|█████▋    | 1143/2000 [28:29<16:57,  1.19s/it]

Training_loss 17.58555


 57%|█████▋    | 1144/2000 [28:30<17:07,  1.20s/it]

Training_loss 17.61901


 57%|█████▋    | 1145/2000 [28:32<17:25,  1.22s/it]

Training_loss 17.61113


 57%|█████▋    | 1146/2000 [28:33<17:23,  1.22s/it]

Training_loss 17.60562


 57%|█████▋    | 1147/2000 [28:34<18:32,  1.30s/it]

Training_loss 17.57786


 57%|█████▋    | 1148/2000 [28:35<18:09,  1.28s/it]

Training_loss 17.60616


 57%|█████▋    | 1149/2000 [28:37<17:51,  1.26s/it]

Training_loss 17.61403


 57%|█████▊    | 1150/2000 [28:38<18:02,  1.27s/it]

Training_loss 17.65156


 58%|█████▊    | 1151/2000 [28:39<17:42,  1.25s/it]

Training_loss 17.67919


 58%|█████▊    | 1152/2000 [28:40<17:31,  1.24s/it]

Training_loss 17.71039


 58%|█████▊    | 1153/2000 [28:42<17:17,  1.22s/it]

Training_loss 17.72595


 58%|█████▊    | 1154/2000 [28:43<17:14,  1.22s/it]

Training_loss 17.73970


 58%|█████▊    | 1155/2000 [28:44<16:53,  1.20s/it]

Training_loss 17.71315


 58%|█████▊    | 1156/2000 [28:45<17:08,  1.22s/it]

Training_loss 17.74345


 58%|█████▊    | 1157/2000 [28:46<17:06,  1.22s/it]

Training_loss 17.74137


 58%|█████▊    | 1158/2000 [28:48<16:55,  1.21s/it]

Training_loss 17.71072


 58%|█████▊    | 1159/2000 [28:49<16:59,  1.21s/it]

Training_loss 17.69742


 58%|█████▊    | 1160/2000 [28:50<18:29,  1.32s/it]

Training_loss 17.66712


 58%|█████▊    | 1161/2000 [28:52<17:50,  1.28s/it]

Training_loss 17.71991


 58%|█████▊    | 1162/2000 [28:53<17:11,  1.23s/it]

Training_loss 17.73408


 58%|█████▊    | 1163/2000 [28:54<16:56,  1.21s/it]

Training_loss 17.74770


 58%|█████▊    | 1164/2000 [28:55<16:34,  1.19s/it]

Training_loss 17.76675


 58%|█████▊    | 1165/2000 [28:56<16:25,  1.18s/it]

Training_loss 17.73800


 58%|█████▊    | 1166/2000 [28:57<16:55,  1.22s/it]

Training_loss 17.73043


 58%|█████▊    | 1167/2000 [28:59<17:52,  1.29s/it]

Training_loss 17.68903


 58%|█████▊    | 1168/2000 [29:00<17:32,  1.27s/it]

Training_loss 17.68420


 58%|█████▊    | 1169/2000 [29:01<17:10,  1.24s/it]

Training_loss 17.68797


 58%|█████▊    | 1170/2000 [29:02<16:51,  1.22s/it]

Training_loss 17.67254


 59%|█████▊    | 1171/2000 [29:04<16:45,  1.21s/it]

Training_loss 17.67555


 59%|█████▊    | 1172/2000 [29:05<16:20,  1.18s/it]

Training_loss 17.63638


 59%|█████▊    | 1173/2000 [29:06<17:15,  1.25s/it]

Training_loss 17.63595


 59%|█████▊    | 1174/2000 [29:07<16:55,  1.23s/it]

Training_loss 17.61570


 59%|█████▉    | 1175/2000 [29:09<16:34,  1.21s/it]

Training_loss 17.60136


 59%|█████▉    | 1176/2000 [29:10<16:38,  1.21s/it]

Training_loss 17.61593


 59%|█████▉    | 1177/2000 [29:11<16:54,  1.23s/it]

Training_loss 17.64186


 59%|█████▉    | 1178/2000 [29:12<16:54,  1.23s/it]

Training_loss 17.63842


 59%|█████▉    | 1179/2000 [29:14<16:54,  1.24s/it]

Training_loss 17.60143


 59%|█████▉    | 1180/2000 [29:15<17:36,  1.29s/it]

Training_loss 17.63677


 59%|█████▉    | 1181/2000 [29:16<17:07,  1.25s/it]

Training_loss 17.65901


 59%|█████▉    | 1182/2000 [29:17<17:07,  1.26s/it]

Training_loss 17.63165


 59%|█████▉    | 1183/2000 [29:19<17:06,  1.26s/it]

Training_loss 17.67415


 59%|█████▉    | 1184/2000 [29:20<17:04,  1.26s/it]

Training_loss 17.67319


 59%|█████▉    | 1185/2000 [29:21<17:05,  1.26s/it]

Training_loss 17.65632


 59%|█████▉    | 1186/2000 [29:23<17:43,  1.31s/it]

Training_loss 17.63055


 59%|█████▉    | 1187/2000 [29:24<17:35,  1.30s/it]

Training_loss 17.65205


 59%|█████▉    | 1188/2000 [29:25<17:03,  1.26s/it]

Training_loss 17.65075


 59%|█████▉    | 1189/2000 [29:26<16:45,  1.24s/it]

Training_loss 17.69861


 60%|█████▉    | 1190/2000 [29:27<16:29,  1.22s/it]

Training_loss 17.71064


 60%|█████▉    | 1191/2000 [29:29<16:15,  1.21s/it]

Training_loss 17.71271


 60%|█████▉    | 1192/2000 [29:30<16:03,  1.19s/it]

Training_loss 17.71026


 60%|█████▉    | 1193/2000 [29:31<15:33,  1.16s/it]

Training_loss 17.69334


 60%|█████▉    | 1194/2000 [29:32<16:02,  1.19s/it]

Training_loss 17.67735


 60%|█████▉    | 1195/2000 [29:33<15:57,  1.19s/it]

Training_loss 17.72074


 60%|█████▉    | 1196/2000 [29:34<15:54,  1.19s/it]

Training_loss 17.71636


 60%|█████▉    | 1197/2000 [29:36<15:48,  1.18s/it]

Training_loss 17.68878


 60%|█████▉    | 1198/2000 [29:37<15:43,  1.18s/it]

Training_loss 17.70166


 60%|█████▉    | 1199/2000 [29:38<16:37,  1.25s/it]

Training_loss 17.68701


 60%|██████    | 1200/2000 [29:39<16:29,  1.24s/it]

Training_loss 17.67709


 60%|██████    | 1201/2000 [29:41<16:15,  1.22s/it]

Training_loss 17.70536


 60%|██████    | 1202/2000 [29:42<15:59,  1.20s/it]

Training_loss 17.68166


 60%|██████    | 1203/2000 [29:43<15:43,  1.18s/it]

Training_loss 17.66704


 60%|██████    | 1204/2000 [29:44<15:44,  1.19s/it]

Training_loss 17.65023


 60%|██████    | 1205/2000 [29:45<15:42,  1.19s/it]

Training_loss 17.64720


 60%|██████    | 1206/2000 [29:46<15:20,  1.16s/it]

Training_loss 17.66734


 60%|██████    | 1207/2000 [29:48<15:26,  1.17s/it]

Training_loss 17.70515


 60%|██████    | 1208/2000 [29:49<15:49,  1.20s/it]

Training_loss 17.67865


 60%|██████    | 1209/2000 [29:50<15:39,  1.19s/it]

Training_loss 17.68344


 60%|██████    | 1210/2000 [29:51<15:48,  1.20s/it]

Training_loss 17.66331


 61%|██████    | 1211/2000 [29:52<15:27,  1.18s/it]

Training_loss 17.69914


 61%|██████    | 1212/2000 [29:54<16:02,  1.22s/it]

Training_loss 17.70594


 61%|██████    | 1213/2000 [29:55<16:27,  1.25s/it]

Training_loss 17.72732


 61%|██████    | 1214/2000 [29:56<16:13,  1.24s/it]

Training_loss 17.71331


 61%|██████    | 1215/2000 [29:57<15:43,  1.20s/it]

Training_loss 17.72526


 61%|██████    | 1216/2000 [29:59<16:07,  1.23s/it]

Training_loss 17.73854


 61%|██████    | 1217/2000 [30:00<17:36,  1.35s/it]

Training_loss 17.76669


 61%|██████    | 1218/2000 [30:01<17:17,  1.33s/it]

Training_loss 17.77273


 61%|██████    | 1219/2000 [30:03<16:43,  1.29s/it]

Training_loss 17.78122


 61%|██████    | 1220/2000 [30:04<16:38,  1.28s/it]

Training_loss 17.74244


 61%|██████    | 1221/2000 [30:05<16:03,  1.24s/it]

Training_loss 17.72499


 61%|██████    | 1222/2000 [30:06<15:50,  1.22s/it]

Training_loss 17.74043


 61%|██████    | 1223/2000 [30:07<15:24,  1.19s/it]

Training_loss 17.70847


 61%|██████    | 1224/2000 [30:09<15:18,  1.18s/it]

Training_loss 17.70602


 61%|██████▏   | 1225/2000 [30:10<15:57,  1.23s/it]

Training_loss 17.70745


 61%|██████▏   | 1226/2000 [30:11<16:11,  1.26s/it]

Training_loss 17.72256


 61%|██████▏   | 1227/2000 [30:12<15:42,  1.22s/it]

Training_loss 17.71156


 61%|██████▏   | 1228/2000 [30:14<15:41,  1.22s/it]

Training_loss 17.73438


 61%|██████▏   | 1229/2000 [30:15<15:14,  1.19s/it]

Training_loss 17.77267


 62%|██████▏   | 1230/2000 [30:16<15:40,  1.22s/it]

Training_loss 17.79048


 62%|██████▏   | 1231/2000 [30:17<15:30,  1.21s/it]

Training_loss 17.76833


 62%|██████▏   | 1232/2000 [30:18<15:15,  1.19s/it]

Training_loss 17.78019


 62%|██████▏   | 1233/2000 [30:19<15:12,  1.19s/it]

Training_loss 17.79954


 62%|██████▏   | 1234/2000 [30:21<14:57,  1.17s/it]

Training_loss 17.76939


 62%|██████▏   | 1235/2000 [30:22<14:59,  1.18s/it]

Training_loss 17.79614


 62%|██████▏   | 1236/2000 [30:23<14:34,  1.14s/it]

Training_loss 17.80231


 62%|██████▏   | 1237/2000 [30:24<14:37,  1.15s/it]

Training_loss 17.79928


 62%|██████▏   | 1238/2000 [30:25<14:43,  1.16s/it]

Training_loss 17.82161


 62%|██████▏   | 1239/2000 [30:27<15:44,  1.24s/it]

Training_loss 17.79144


 62%|██████▏   | 1240/2000 [30:28<15:23,  1.22s/it]

Training_loss 17.78776


 62%|██████▏   | 1241/2000 [30:29<15:10,  1.20s/it]

Training_loss 17.80202


 62%|██████▏   | 1242/2000 [30:30<14:45,  1.17s/it]

Training_loss 17.75788


 62%|██████▏   | 1243/2000 [30:31<14:48,  1.17s/it]

Training_loss 17.72033


 62%|██████▏   | 1244/2000 [30:33<15:08,  1.20s/it]

Training_loss 17.71423


 62%|██████▏   | 1245/2000 [30:34<15:02,  1.20s/it]

Training_loss 17.72915


 62%|██████▏   | 1246/2000 [30:35<14:49,  1.18s/it]

Training_loss 17.72710


 62%|██████▏   | 1247/2000 [30:36<14:56,  1.19s/it]

Training_loss 17.71397


 62%|██████▏   | 1248/2000 [30:37<15:03,  1.20s/it]

Training_loss 17.70962


 62%|██████▏   | 1249/2000 [30:38<14:59,  1.20s/it]

Training_loss 17.69689


 62%|██████▎   | 1250/2000 [30:40<14:52,  1.19s/it]

Training_loss 17.71802


 63%|██████▎   | 1251/2000 [30:41<14:47,  1.18s/it]

Training_loss 17.74699


 63%|██████▎   | 1252/2000 [30:42<15:39,  1.26s/it]

Training_loss 17.74017


 63%|██████▎   | 1253/2000 [30:43<15:30,  1.24s/it]

Training_loss 17.78130


 63%|██████▎   | 1254/2000 [30:45<15:10,  1.22s/it]

Training_loss 17.79537


 63%|██████▎   | 1255/2000 [30:46<15:07,  1.22s/it]

Training_loss 17.78146


 63%|██████▎   | 1256/2000 [30:47<14:36,  1.18s/it]

Training_loss 17.78017


 63%|██████▎   | 1257/2000 [30:48<14:31,  1.17s/it]

Training_loss 17.80481


 63%|██████▎   | 1258/2000 [30:49<14:37,  1.18s/it]

Training_loss 17.75413


 63%|██████▎   | 1259/2000 [30:51<14:44,  1.19s/it]

Training_loss 17.79089


 63%|██████▎   | 1260/2000 [30:52<14:41,  1.19s/it]

Training_loss 17.83503


 63%|██████▎   | 1261/2000 [30:53<14:39,  1.19s/it]

Training_loss 17.83808


 63%|██████▎   | 1262/2000 [30:54<14:36,  1.19s/it]

Training_loss 17.85231


 63%|██████▎   | 1263/2000 [30:55<14:39,  1.19s/it]

Training_loss 17.86881


 63%|██████▎   | 1264/2000 [30:56<14:37,  1.19s/it]

Training_loss 17.90731


 63%|██████▎   | 1265/2000 [30:58<15:21,  1.25s/it]

Training_loss 17.91442


 63%|██████▎   | 1266/2000 [30:59<16:22,  1.34s/it]

Training_loss 17.93415


 63%|██████▎   | 1267/2000 [31:01<15:48,  1.29s/it]

Training_loss 17.95582


 63%|██████▎   | 1268/2000 [31:02<15:40,  1.28s/it]

Training_loss 17.98575


 63%|██████▎   | 1269/2000 [31:03<15:52,  1.30s/it]

Training_loss 18.01897


 64%|██████▎   | 1270/2000 [31:05<16:42,  1.37s/it]

Training_loss 18.01589


 64%|██████▎   | 1271/2000 [31:06<16:37,  1.37s/it]

Training_loss 17.99885


 64%|██████▎   | 1272/2000 [31:07<16:31,  1.36s/it]

Training_loss 18.03530


 64%|██████▎   | 1273/2000 [31:09<15:51,  1.31s/it]

Training_loss 18.01170


 64%|██████▎   | 1274/2000 [31:10<15:22,  1.27s/it]

Training_loss 17.96226


 64%|██████▍   | 1275/2000 [31:11<16:25,  1.36s/it]

Training_loss 17.97645


 64%|██████▍   | 1276/2000 [31:14<19:37,  1.63s/it]

Training_loss 17.95735


 64%|██████▍   | 1277/2000 [31:15<19:28,  1.62s/it]

Training_loss 17.97213


 64%|██████▍   | 1278/2000 [31:16<17:57,  1.49s/it]

Training_loss 17.95500


 64%|██████▍   | 1279/2000 [31:17<16:08,  1.34s/it]

Training_loss 18.00106


 64%|██████▍   | 1280/2000 [31:18<14:43,  1.23s/it]

Training_loss 17.97620


 64%|██████▍   | 1281/2000 [31:19<13:52,  1.16s/it]

Training_loss 17.98642


 64%|██████▍   | 1282/2000 [31:20<12:52,  1.08s/it]

Training_loss 17.96834


 64%|██████▍   | 1283/2000 [31:21<12:23,  1.04s/it]

Training_loss 17.99161


 64%|██████▍   | 1284/2000 [31:22<11:53,  1.00it/s]

Training_loss 17.98696


 64%|██████▍   | 1285/2000 [31:23<11:29,  1.04it/s]

Training_loss 18.02258


 64%|██████▍   | 1286/2000 [31:24<11:28,  1.04it/s]

Training_loss 18.01121


 64%|██████▍   | 1287/2000 [31:25<11:46,  1.01it/s]

Training_loss 17.98358


 64%|██████▍   | 1288/2000 [31:26<12:33,  1.06s/it]

Training_loss 17.99742


 64%|██████▍   | 1289/2000 [31:27<12:41,  1.07s/it]

Training_loss 17.98778


 64%|██████▍   | 1290/2000 [31:28<12:37,  1.07s/it]

Training_loss 17.98829


 65%|██████▍   | 1291/2000 [31:29<12:37,  1.07s/it]

Training_loss 17.97351


 65%|██████▍   | 1292/2000 [31:31<12:48,  1.09s/it]

Training_loss 17.97407


 65%|██████▍   | 1293/2000 [31:32<12:55,  1.10s/it]

Training_loss 17.95077


 65%|██████▍   | 1294/2000 [31:33<13:05,  1.11s/it]

Training_loss 17.93533


 65%|██████▍   | 1295/2000 [31:34<12:42,  1.08s/it]

Training_loss 17.94664


 65%|██████▍   | 1296/2000 [31:35<12:36,  1.07s/it]

Training_loss 17.90776


 65%|██████▍   | 1297/2000 [31:36<12:04,  1.03s/it]

Training_loss 17.95771


 65%|██████▍   | 1298/2000 [31:37<11:45,  1.01s/it]

Training_loss 17.93857


 65%|██████▍   | 1299/2000 [31:38<11:40,  1.00it/s]

Training_loss 17.95440


 65%|██████▌   | 1300/2000 [31:39<11:35,  1.01it/s]

Training_loss 17.90309


 65%|██████▌   | 1301/2000 [31:40<11:34,  1.01it/s]

Training_loss 17.88212


 65%|██████▌   | 1302/2000 [31:41<11:38,  1.00s/it]

Training_loss 17.90878


 65%|██████▌   | 1303/2000 [31:42<11:38,  1.00s/it]

Training_loss 17.92412


 65%|██████▌   | 1304/2000 [31:43<11:42,  1.01s/it]

Training_loss 17.92201


 65%|██████▌   | 1305/2000 [31:44<11:32,  1.00it/s]

Training_loss 17.92950


 65%|██████▌   | 1306/2000 [31:45<11:20,  1.02it/s]

Training_loss 17.90575


 65%|██████▌   | 1307/2000 [31:46<12:17,  1.06s/it]

Training_loss 17.92570


 65%|██████▌   | 1308/2000 [31:47<12:33,  1.09s/it]

Training_loss 17.94581


 65%|██████▌   | 1309/2000 [31:48<12:01,  1.04s/it]

Training_loss 17.93771


 66%|██████▌   | 1310/2000 [31:49<11:51,  1.03s/it]

Training_loss 17.95938


 66%|██████▌   | 1311/2000 [31:50<11:30,  1.00s/it]

Training_loss 17.97732


 66%|██████▌   | 1312/2000 [31:51<11:34,  1.01s/it]

Training_loss 17.99966


 66%|██████▌   | 1313/2000 [31:52<12:05,  1.06s/it]

Training_loss 18.05158


 66%|██████▌   | 1314/2000 [31:53<11:43,  1.03s/it]

Training_loss 18.07063


 66%|██████▌   | 1315/2000 [31:54<11:27,  1.00s/it]

Training_loss 18.04509


 66%|██████▌   | 1316/2000 [31:55<11:20,  1.00it/s]

Training_loss 18.06762


 66%|██████▌   | 1317/2000 [31:56<11:12,  1.01it/s]

Training_loss 18.06594


 66%|██████▌   | 1318/2000 [31:57<10:56,  1.04it/s]

Training_loss 18.09265


 66%|██████▌   | 1319/2000 [31:58<11:11,  1.01it/s]

Training_loss 18.07686


 66%|██████▌   | 1320/2000 [31:59<12:25,  1.10s/it]

Training_loss 18.11130


 66%|██████▌   | 1321/2000 [32:01<13:16,  1.17s/it]

Training_loss 18.10256


 66%|██████▌   | 1322/2000 [32:02<13:37,  1.21s/it]

Training_loss 18.07617


 66%|██████▌   | 1323/2000 [32:03<13:17,  1.18s/it]

Training_loss 18.11986


 66%|██████▌   | 1324/2000 [32:04<12:50,  1.14s/it]

Training_loss 18.13640


 66%|██████▋   | 1325/2000 [32:05<12:18,  1.09s/it]

Training_loss 18.14606


 66%|██████▋   | 1326/2000 [32:06<11:58,  1.07s/it]

Training_loss 18.11689


 66%|██████▋   | 1327/2000 [32:07<11:33,  1.03s/it]

Training_loss 18.10645


 66%|██████▋   | 1328/2000 [32:08<11:14,  1.00s/it]

Training_loss 18.12419


 66%|██████▋   | 1329/2000 [32:09<11:01,  1.01it/s]

Training_loss 18.14614


 66%|██████▋   | 1330/2000 [32:10<11:09,  1.00it/s]

Training_loss 18.16645


 67%|██████▋   | 1331/2000 [32:11<10:56,  1.02it/s]

Training_loss 18.20415


 67%|██████▋   | 1332/2000 [32:12<10:43,  1.04it/s]

Training_loss 18.17411


 67%|██████▋   | 1333/2000 [32:13<11:01,  1.01it/s]

Training_loss 18.15885


 67%|██████▋   | 1334/2000 [32:14<11:18,  1.02s/it]

Training_loss 18.19202


 67%|██████▋   | 1335/2000 [32:15<11:45,  1.06s/it]

Training_loss 18.19265


 67%|██████▋   | 1336/2000 [32:16<11:45,  1.06s/it]

Training_loss 18.21541


 67%|██████▋   | 1337/2000 [32:17<12:14,  1.11s/it]

Training_loss 18.18736


 67%|██████▋   | 1338/2000 [32:19<12:29,  1.13s/it]

Training_loss 18.19130


 67%|██████▋   | 1339/2000 [32:20<12:32,  1.14s/it]

Training_loss 18.23251


 67%|██████▋   | 1340/2000 [32:21<12:04,  1.10s/it]

Training_loss 18.26769


 67%|██████▋   | 1341/2000 [32:22<11:31,  1.05s/it]

Training_loss 18.27922


 67%|██████▋   | 1342/2000 [32:23<11:22,  1.04s/it]

Training_loss 18.28001


 67%|██████▋   | 1343/2000 [32:24<10:51,  1.01it/s]

Training_loss 18.29024


 67%|██████▋   | 1344/2000 [32:25<10:49,  1.01it/s]

Training_loss 18.28765


 67%|██████▋   | 1345/2000 [32:25<10:29,  1.04it/s]

Training_loss 18.28995


 67%|██████▋   | 1346/2000 [32:26<10:14,  1.06it/s]

Training_loss 18.29614


 67%|██████▋   | 1347/2000 [32:27<10:17,  1.06it/s]

Training_loss 18.26712


 67%|██████▋   | 1348/2000 [32:28<10:31,  1.03it/s]

Training_loss 18.31138


 67%|██████▋   | 1349/2000 [32:29<10:32,  1.03it/s]

Training_loss 18.29915


 68%|██████▊   | 1350/2000 [32:30<10:44,  1.01it/s]

Training_loss 18.29650


 68%|██████▊   | 1351/2000 [32:31<10:41,  1.01it/s]

Training_loss 18.23364


 68%|██████▊   | 1352/2000 [32:32<10:41,  1.01it/s]

Training_loss 18.22938


 68%|██████▊   | 1353/2000 [32:33<10:55,  1.01s/it]

Training_loss 18.23868


 68%|██████▊   | 1354/2000 [32:35<11:45,  1.09s/it]

Training_loss 18.22989


 68%|██████▊   | 1355/2000 [32:36<11:33,  1.08s/it]

Training_loss 18.25963


 68%|██████▊   | 1356/2000 [32:37<11:16,  1.05s/it]

Training_loss 18.30238


 68%|██████▊   | 1357/2000 [32:38<11:01,  1.03s/it]

Training_loss 18.30743


 68%|██████▊   | 1358/2000 [32:39<10:57,  1.02s/it]

Training_loss 18.31931


 68%|██████▊   | 1359/2000 [32:40<11:02,  1.03s/it]

Training_loss 18.30637


 68%|██████▊   | 1360/2000 [32:41<11:16,  1.06s/it]

Training_loss 18.31508


 68%|██████▊   | 1361/2000 [32:42<11:03,  1.04s/it]

Training_loss 18.29635


 68%|██████▊   | 1362/2000 [32:43<11:09,  1.05s/it]

Training_loss 18.29029


 68%|██████▊   | 1363/2000 [32:44<11:25,  1.08s/it]

Training_loss 18.30276


 68%|██████▊   | 1364/2000 [32:45<11:24,  1.08s/it]

Training_loss 18.29251


 68%|██████▊   | 1365/2000 [32:46<11:04,  1.05s/it]

Training_loss 18.27054


 68%|██████▊   | 1366/2000 [32:47<10:41,  1.01s/it]

Training_loss 18.24706


 68%|██████▊   | 1367/2000 [32:48<10:48,  1.03s/it]

Training_loss 18.25771


 68%|██████▊   | 1368/2000 [32:49<10:52,  1.03s/it]

Training_loss 18.30061


 68%|██████▊   | 1369/2000 [32:50<11:24,  1.08s/it]

Training_loss 18.27258


 68%|██████▊   | 1370/2000 [32:51<11:25,  1.09s/it]

Training_loss 18.28851


 69%|██████▊   | 1371/2000 [32:52<11:10,  1.07s/it]

Training_loss 18.25055


 69%|██████▊   | 1372/2000 [32:54<11:09,  1.07s/it]

Training_loss 18.24997


 69%|██████▊   | 1373/2000 [32:54<10:39,  1.02s/it]

Training_loss 18.27807


 69%|██████▊   | 1374/2000 [32:55<10:22,  1.01it/s]

Training_loss 18.31264


 69%|██████▉   | 1375/2000 [32:56<10:22,  1.00it/s]

Training_loss 18.28405


 69%|██████▉   | 1376/2000 [32:57<10:07,  1.03it/s]

Training_loss 18.32449


 69%|██████▉   | 1377/2000 [32:58<10:08,  1.02it/s]

Training_loss 18.29357


 69%|██████▉   | 1378/2000 [32:59<09:56,  1.04it/s]

Training_loss 18.26486


 69%|██████▉   | 1379/2000 [33:00<10:02,  1.03it/s]

Training_loss 18.26577


 69%|██████▉   | 1380/2000 [33:01<10:12,  1.01it/s]

Training_loss 18.28674


 69%|██████▉   | 1381/2000 [33:02<10:10,  1.01it/s]

Training_loss 18.33326


 69%|██████▉   | 1382/2000 [33:03<10:46,  1.05s/it]

Training_loss 18.33014


 69%|██████▉   | 1383/2000 [33:05<11:27,  1.11s/it]

Training_loss 18.33170


 69%|██████▉   | 1384/2000 [33:07<15:11,  1.48s/it]

Training_loss 18.36282


 69%|██████▉   | 1385/2000 [33:09<16:53,  1.65s/it]

Training_loss 18.31872


 69%|██████▉   | 1386/2000 [33:11<17:51,  1.75s/it]

Training_loss 18.31750


 69%|██████▉   | 1387/2000 [33:13<17:05,  1.67s/it]

Training_loss 18.31041


 69%|██████▉   | 1388/2000 [33:14<16:23,  1.61s/it]

Training_loss 18.29312


 69%|██████▉   | 1389/2000 [33:16<17:08,  1.68s/it]

Training_loss 18.25385


 70%|██████▉   | 1390/2000 [33:17<15:33,  1.53s/it]

Training_loss 18.25023


 70%|██████▉   | 1391/2000 [33:18<14:11,  1.40s/it]

Training_loss 18.27051


 70%|██████▉   | 1392/2000 [33:19<13:25,  1.33s/it]

Training_loss 18.27308


 70%|██████▉   | 1393/2000 [33:20<12:54,  1.28s/it]

Training_loss 18.26665


 70%|██████▉   | 1394/2000 [33:22<12:31,  1.24s/it]

Training_loss 18.26956


 70%|██████▉   | 1395/2000 [33:23<12:26,  1.23s/it]

Training_loss 18.28716


 70%|██████▉   | 1396/2000 [33:24<11:57,  1.19s/it]

Training_loss 18.27826


 70%|██████▉   | 1397/2000 [33:25<11:18,  1.13s/it]

Training_loss 18.26604


 70%|██████▉   | 1398/2000 [33:26<10:49,  1.08s/it]

Training_loss 18.27296


 70%|██████▉   | 1399/2000 [33:27<10:32,  1.05s/it]

Training_loss 18.27268


 70%|███████   | 1400/2000 [33:28<10:28,  1.05s/it]

Training_loss 18.24292


 70%|███████   | 1401/2000 [33:29<10:34,  1.06s/it]

Training_loss 18.25636


 70%|███████   | 1402/2000 [33:30<10:36,  1.06s/it]

Training_loss 18.24146


 70%|███████   | 1403/2000 [33:31<10:06,  1.02s/it]

Training_loss 18.22862


 70%|███████   | 1404/2000 [33:32<09:54,  1.00it/s]

Training_loss 18.20865


 70%|███████   | 1405/2000 [33:33<09:49,  1.01it/s]

Training_loss 18.24393


 70%|███████   | 1406/2000 [33:34<09:46,  1.01it/s]

Training_loss 18.23062


 70%|███████   | 1407/2000 [33:35<10:05,  1.02s/it]

Training_loss 18.27504


 70%|███████   | 1408/2000 [33:36<09:39,  1.02it/s]

Training_loss 18.27069


 70%|███████   | 1409/2000 [33:37<09:39,  1.02it/s]

Training_loss 18.25974


 70%|███████   | 1410/2000 [33:38<09:48,  1.00it/s]

Training_loss 18.25385


 71%|███████   | 1411/2000 [33:39<10:06,  1.03s/it]

Training_loss 18.26310


 71%|███████   | 1412/2000 [33:40<09:57,  1.02s/it]

Training_loss 18.27975


 71%|███████   | 1413/2000 [33:41<09:46,  1.00it/s]

Training_loss 18.28754


 71%|███████   | 1414/2000 [33:42<09:42,  1.01it/s]

Training_loss 18.28610


 71%|███████   | 1415/2000 [33:43<09:35,  1.02it/s]

Training_loss 18.31705


 71%|███████   | 1416/2000 [33:44<09:31,  1.02it/s]

Training_loss 18.31444


 71%|███████   | 1417/2000 [33:45<09:34,  1.01it/s]

Training_loss 18.33423


 71%|███████   | 1418/2000 [33:46<09:32,  1.02it/s]

Training_loss 18.34810


 71%|███████   | 1419/2000 [33:47<09:26,  1.03it/s]

Training_loss 18.32205


 71%|███████   | 1420/2000 [33:48<09:27,  1.02it/s]

Training_loss 18.32642


 71%|███████   | 1421/2000 [33:49<09:26,  1.02it/s]

Training_loss 18.29515


 71%|███████   | 1422/2000 [33:50<09:25,  1.02it/s]

Training_loss 18.28847


 71%|███████   | 1423/2000 [33:51<09:21,  1.03it/s]

Training_loss 18.27293


 71%|███████   | 1424/2000 [33:52<09:23,  1.02it/s]

Training_loss 18.29271


 71%|███████▏  | 1425/2000 [33:53<09:23,  1.02it/s]

Training_loss 18.33348


 71%|███████▏  | 1426/2000 [33:54<09:41,  1.01s/it]

Training_loss 18.34574


 71%|███████▏  | 1427/2000 [33:55<10:17,  1.08s/it]

Training_loss 18.34232


 71%|███████▏  | 1428/2000 [33:56<10:13,  1.07s/it]

Training_loss 18.32008


 71%|███████▏  | 1429/2000 [33:57<09:56,  1.04s/it]

Training_loss 18.28589


 72%|███████▏  | 1430/2000 [33:58<09:46,  1.03s/it]

Training_loss 18.29843


 72%|███████▏  | 1431/2000 [33:59<09:57,  1.05s/it]

Training_loss 18.27615


 72%|███████▏  | 1432/2000 [34:00<10:09,  1.07s/it]

Training_loss 18.29173


 72%|███████▏  | 1433/2000 [34:01<09:53,  1.05s/it]

Training_loss 18.29504


 72%|███████▏  | 1434/2000 [34:02<09:58,  1.06s/it]

Training_loss 18.31223


 72%|███████▏  | 1435/2000 [34:03<09:59,  1.06s/it]

Training_loss 18.32879


 72%|███████▏  | 1436/2000 [34:04<09:35,  1.02s/it]

Training_loss 18.30634


 72%|███████▏  | 1437/2000 [34:05<09:20,  1.01it/s]

Training_loss 18.29532


 72%|███████▏  | 1438/2000 [34:06<09:12,  1.02it/s]

Training_loss 18.27625


 72%|███████▏  | 1439/2000 [34:07<09:03,  1.03it/s]

Training_loss 18.26434


 72%|███████▏  | 1440/2000 [34:08<09:10,  1.02it/s]

Training_loss 18.27332


 72%|███████▏  | 1441/2000 [34:09<09:09,  1.02it/s]

Training_loss 18.26502


 72%|███████▏  | 1442/2000 [34:10<09:58,  1.07s/it]

Training_loss 18.24329


 72%|███████▏  | 1443/2000 [34:11<09:36,  1.04s/it]

Training_loss 18.20260


 72%|███████▏  | 1444/2000 [34:12<09:31,  1.03s/it]

Training_loss 18.19543


 72%|███████▏  | 1445/2000 [34:13<09:21,  1.01s/it]

Training_loss 18.12936


 72%|███████▏  | 1446/2000 [34:14<09:29,  1.03s/it]

Training_loss 18.11225


 72%|███████▏  | 1447/2000 [34:15<09:18,  1.01s/it]

Training_loss 18.10249


 72%|███████▏  | 1448/2000 [34:16<09:07,  1.01it/s]

Training_loss 18.16038


 72%|███████▏  | 1449/2000 [34:17<09:10,  1.00it/s]

Training_loss 18.14745


 72%|███████▎  | 1450/2000 [34:19<09:46,  1.07s/it]

Training_loss 18.13588


 73%|███████▎  | 1451/2000 [34:20<10:00,  1.09s/it]

Training_loss 18.13834


 73%|███████▎  | 1452/2000 [34:21<09:38,  1.06s/it]

Training_loss 18.17598


 73%|███████▎  | 1453/2000 [34:22<09:39,  1.06s/it]

Training_loss 18.22707


 73%|███████▎  | 1454/2000 [34:23<09:35,  1.05s/it]

Training_loss 18.23222


 73%|███████▎  | 1455/2000 [34:24<09:16,  1.02s/it]

Training_loss 18.21112


 73%|███████▎  | 1456/2000 [34:25<09:23,  1.04s/it]

Training_loss 18.23257


 73%|███████▎  | 1457/2000 [34:26<09:48,  1.08s/it]

Training_loss 18.19752


 73%|███████▎  | 1458/2000 [34:27<09:39,  1.07s/it]

Training_loss 18.21578


 73%|███████▎  | 1459/2000 [34:28<09:30,  1.05s/it]

Training_loss 18.21117


 73%|███████▎  | 1460/2000 [34:29<09:17,  1.03s/it]

Training_loss 18.22346


 73%|███████▎  | 1461/2000 [34:30<09:16,  1.03s/it]

Training_loss 18.18722


 73%|███████▎  | 1462/2000 [34:31<09:11,  1.02s/it]

Training_loss 18.15349


 73%|███████▎  | 1463/2000 [34:32<09:10,  1.02s/it]

Training_loss 18.18216


 73%|███████▎  | 1464/2000 [34:33<08:59,  1.01s/it]

Training_loss 18.22625


 73%|███████▎  | 1465/2000 [34:34<08:54,  1.00it/s]

Training_loss 18.19606


 73%|███████▎  | 1466/2000 [34:35<08:46,  1.01it/s]

Training_loss 18.21355


 73%|███████▎  | 1467/2000 [34:36<08:30,  1.04it/s]

Training_loss 18.24608


 73%|███████▎  | 1468/2000 [34:37<08:45,  1.01it/s]

Training_loss 18.22529


 73%|███████▎  | 1469/2000 [34:38<08:37,  1.03it/s]

Training_loss 18.23960


 74%|███████▎  | 1470/2000 [34:39<08:36,  1.03it/s]

Training_loss 18.28562


 74%|███████▎  | 1471/2000 [34:40<08:27,  1.04it/s]

Training_loss 18.34314


 74%|███████▎  | 1472/2000 [34:41<08:30,  1.03it/s]

Training_loss 18.32653


 74%|███████▎  | 1473/2000 [34:42<08:56,  1.02s/it]

Training_loss 18.24205


 74%|███████▎  | 1474/2000 [34:43<09:11,  1.05s/it]

Training_loss 18.26227


 74%|███████▍  | 1475/2000 [34:44<08:59,  1.03s/it]

Training_loss 18.23477


 74%|███████▍  | 1476/2000 [34:45<08:57,  1.03s/it]

Training_loss 18.21237


 74%|███████▍  | 1477/2000 [34:46<08:27,  1.03it/s]

Training_loss 18.21681


 74%|███████▍  | 1478/2000 [34:47<08:33,  1.02it/s]

Training_loss 18.22458


 74%|███████▍  | 1479/2000 [34:48<08:31,  1.02it/s]

Training_loss 18.22511


 74%|███████▍  | 1480/2000 [34:49<08:24,  1.03it/s]

Training_loss 18.22034


 74%|███████▍  | 1481/2000 [34:50<08:40,  1.00s/it]

Training_loss 18.23912


 74%|███████▍  | 1482/2000 [34:51<08:33,  1.01it/s]

Training_loss 18.29513


 74%|███████▍  | 1483/2000 [34:52<08:32,  1.01it/s]

Training_loss 18.30117


 74%|███████▍  | 1484/2000 [34:53<08:39,  1.01s/it]

Training_loss 18.32056


 74%|███████▍  | 1485/2000 [34:54<08:19,  1.03it/s]

Training_loss 18.36005


 74%|███████▍  | 1486/2000 [34:55<08:20,  1.03it/s]

Training_loss 18.35312


 74%|███████▍  | 1487/2000 [34:56<08:17,  1.03it/s]

Training_loss 18.32304


 74%|███████▍  | 1488/2000 [34:57<08:20,  1.02it/s]

Training_loss 18.32503


 74%|███████▍  | 1489/2000 [34:58<09:00,  1.06s/it]

Training_loss 18.32787


 74%|███████▍  | 1490/2000 [34:59<09:24,  1.11s/it]

Training_loss 18.38060


 75%|███████▍  | 1491/2000 [35:00<09:05,  1.07s/it]

Training_loss 18.44106


 75%|███████▍  | 1492/2000 [35:01<08:48,  1.04s/it]

Training_loss 18.42102


 75%|███████▍  | 1493/2000 [35:02<08:37,  1.02s/it]

Training_loss 18.41585


 75%|███████▍  | 1494/2000 [35:03<08:44,  1.04s/it]

Training_loss 18.40812


 75%|███████▍  | 1495/2000 [35:04<08:37,  1.02s/it]

Training_loss 18.40266


 75%|███████▍  | 1496/2000 [35:05<08:24,  1.00s/it]

Training_loss 18.41533


 75%|███████▍  | 1497/2000 [35:06<08:13,  1.02it/s]

Training_loss 18.44858


 75%|███████▍  | 1498/2000 [35:07<08:06,  1.03it/s]

Training_loss 18.46529


 75%|███████▍  | 1499/2000 [35:08<08:06,  1.03it/s]

Training_loss 18.47411


 75%|███████▌  | 1500/2000 [35:09<08:05,  1.03it/s]

Training_loss 18.52340


 75%|███████▌  | 1501/2000 [35:10<07:57,  1.04it/s]

Training_loss 18.55819


 75%|███████▌  | 1502/2000 [35:11<07:59,  1.04it/s]

Training_loss 18.57305


 75%|███████▌  | 1503/2000 [35:12<08:02,  1.03it/s]

Training_loss 18.59797


 75%|███████▌  | 1504/2000 [35:13<08:00,  1.03it/s]

Training_loss 18.55947


 75%|███████▌  | 1505/2000 [35:14<08:40,  1.05s/it]

Training_loss 18.58035


 75%|███████▌  | 1506/2000 [35:15<08:37,  1.05s/it]

Training_loss 18.59251


 75%|███████▌  | 1507/2000 [35:16<08:25,  1.03s/it]

Training_loss 18.60692


 75%|███████▌  | 1508/2000 [35:17<08:16,  1.01s/it]

Training_loss 18.58879


 75%|███████▌  | 1509/2000 [35:18<08:16,  1.01s/it]

Training_loss 18.57125


 76%|███████▌  | 1510/2000 [35:19<08:15,  1.01s/it]

Training_loss 18.57505


 76%|███████▌  | 1511/2000 [35:20<08:09,  1.00s/it]

Training_loss 18.60188


 76%|███████▌  | 1512/2000 [35:21<08:07,  1.00it/s]

Training_loss 18.56840


 76%|███████▌  | 1513/2000 [35:22<07:57,  1.02it/s]

Training_loss 18.52712


 76%|███████▌  | 1514/2000 [35:23<07:52,  1.03it/s]

Training_loss 18.48685


 76%|███████▌  | 1515/2000 [35:24<07:45,  1.04it/s]

Training_loss 18.44227


 76%|███████▌  | 1516/2000 [35:25<08:10,  1.01s/it]

Training_loss 18.41764


 76%|███████▌  | 1517/2000 [35:26<07:59,  1.01it/s]

Training_loss 18.44000


 76%|███████▌  | 1518/2000 [35:27<07:51,  1.02it/s]

Training_loss 18.48497


 76%|███████▌  | 1519/2000 [35:28<07:40,  1.04it/s]

Training_loss 18.45781


 76%|███████▌  | 1520/2000 [35:29<07:51,  1.02it/s]

Training_loss 18.47941


 76%|███████▌  | 1521/2000 [35:30<08:40,  1.09s/it]

Training_loss 18.47346


 76%|███████▌  | 1522/2000 [35:31<08:53,  1.12s/it]

Training_loss 18.48504


 76%|███████▌  | 1523/2000 [35:32<08:43,  1.10s/it]

Training_loss 18.49378


 76%|███████▌  | 1524/2000 [35:33<08:44,  1.10s/it]

Training_loss 18.50778


 76%|███████▋  | 1525/2000 [35:34<08:17,  1.05s/it]

Training_loss 18.54151


 76%|███████▋  | 1526/2000 [35:35<08:23,  1.06s/it]

Training_loss 18.53070


 76%|███████▋  | 1527/2000 [35:36<08:08,  1.03s/it]

Training_loss 18.55468


 76%|███████▋  | 1528/2000 [35:38<08:12,  1.04s/it]

Training_loss 18.52349


 76%|███████▋  | 1529/2000 [35:39<08:19,  1.06s/it]

Training_loss 18.53277


 76%|███████▋  | 1530/2000 [35:40<08:06,  1.04s/it]

Training_loss 18.57023


 77%|███████▋  | 1531/2000 [35:41<08:01,  1.03s/it]

Training_loss 18.61843


 77%|███████▋  | 1532/2000 [35:42<07:46,  1.00it/s]

Training_loss 18.62684


 77%|███████▋  | 1533/2000 [35:43<07:43,  1.01it/s]

Training_loss 18.59223


 77%|███████▋  | 1534/2000 [35:44<07:45,  1.00it/s]

Training_loss 18.56384


 77%|███████▋  | 1535/2000 [35:45<07:44,  1.00it/s]

Training_loss 18.57035


 77%|███████▋  | 1536/2000 [35:46<08:02,  1.04s/it]

Training_loss 18.56273


 77%|███████▋  | 1537/2000 [35:47<08:12,  1.06s/it]

Training_loss 18.55209


 77%|███████▋  | 1538/2000 [35:48<08:04,  1.05s/it]

Training_loss 18.61561


 77%|███████▋  | 1539/2000 [35:49<07:53,  1.03s/it]

Training_loss 18.65404


 77%|███████▋  | 1540/2000 [35:50<07:54,  1.03s/it]

Training_loss 18.68555


 77%|███████▋  | 1541/2000 [35:51<07:35,  1.01it/s]

Training_loss 18.69349


 77%|███████▋  | 1542/2000 [35:52<07:31,  1.02it/s]

Training_loss 18.69530


 77%|███████▋  | 1543/2000 [35:53<07:54,  1.04s/it]

Training_loss 18.72035


 77%|███████▋  | 1544/2000 [35:54<08:21,  1.10s/it]

Training_loss 18.67823


 77%|███████▋  | 1545/2000 [35:55<08:34,  1.13s/it]

Training_loss 18.67265


 77%|███████▋  | 1546/2000 [35:56<08:40,  1.15s/it]

Training_loss 18.67897


 77%|███████▋  | 1547/2000 [35:58<08:52,  1.18s/it]

Training_loss 18.66972


 77%|███████▋  | 1548/2000 [35:59<09:56,  1.32s/it]

Training_loss 18.68926


 77%|███████▋  | 1549/2000 [36:01<10:33,  1.40s/it]

Training_loss 18.75526


 78%|███████▊  | 1550/2000 [36:03<11:08,  1.48s/it]

Training_loss 18.77814


 78%|███████▊  | 1551/2000 [36:04<09:45,  1.30s/it]

Training_loss 18.82176


 78%|███████▊  | 1552/2000 [36:04<08:50,  1.18s/it]

Training_loss 18.82584


 78%|███████▊  | 1553/2000 [36:05<08:24,  1.13s/it]

Training_loss 18.86212


 78%|███████▊  | 1554/2000 [36:07<08:16,  1.11s/it]

Training_loss 18.84843


 78%|███████▊  | 1555/2000 [36:08<08:46,  1.18s/it]

Training_loss 18.86071


 78%|███████▊  | 1556/2000 [36:09<09:01,  1.22s/it]

Training_loss 18.85028


 78%|███████▊  | 1557/2000 [36:10<08:48,  1.19s/it]

Training_loss 18.83185


 78%|███████▊  | 1558/2000 [36:11<08:09,  1.11s/it]

Training_loss 18.80941


 78%|███████▊  | 1559/2000 [36:12<07:48,  1.06s/it]

Training_loss 18.81746


 78%|███████▊  | 1560/2000 [36:13<08:20,  1.14s/it]

Training_loss 18.78839


 78%|███████▊  | 1561/2000 [36:15<08:11,  1.12s/it]

Training_loss 18.79359


 78%|███████▊  | 1562/2000 [36:16<07:57,  1.09s/it]

Training_loss 18.78022


 78%|███████▊  | 1563/2000 [36:17<07:38,  1.05s/it]

Training_loss 18.76622


 78%|███████▊  | 1564/2000 [36:18<07:45,  1.07s/it]

Training_loss 18.73268


 78%|███████▊  | 1565/2000 [36:19<08:17,  1.14s/it]

Training_loss 18.79069


 78%|███████▊  | 1566/2000 [36:20<08:06,  1.12s/it]

Training_loss 18.77687


 78%|███████▊  | 1567/2000 [36:21<07:42,  1.07s/it]

Training_loss 18.77097


 78%|███████▊  | 1568/2000 [36:22<07:50,  1.09s/it]

Training_loss 18.78728


 78%|███████▊  | 1569/2000 [36:23<07:34,  1.05s/it]

Training_loss 18.80894


 78%|███████▊  | 1570/2000 [36:24<07:32,  1.05s/it]

Training_loss 18.86426


 79%|███████▊  | 1571/2000 [36:25<07:27,  1.04s/it]

Training_loss 18.87930


 79%|███████▊  | 1572/2000 [36:26<07:23,  1.04s/it]

Training_loss 18.89208


 79%|███████▊  | 1573/2000 [36:27<07:17,  1.03s/it]

Training_loss 18.83628


 79%|███████▊  | 1574/2000 [36:28<07:03,  1.01it/s]

Training_loss 18.83345


 79%|███████▉  | 1575/2000 [36:29<07:09,  1.01s/it]

Training_loss 18.82375


 79%|███████▉  | 1576/2000 [36:30<07:05,  1.00s/it]

Training_loss 18.84132


 79%|███████▉  | 1577/2000 [36:31<07:10,  1.02s/it]

Training_loss 18.79595


 79%|███████▉  | 1578/2000 [36:32<07:07,  1.01s/it]

Training_loss 18.81018


 79%|███████▉  | 1579/2000 [36:33<07:27,  1.06s/it]

Training_loss 18.79748


 79%|███████▉  | 1580/2000 [36:34<07:29,  1.07s/it]

Training_loss 18.78593


 79%|███████▉  | 1581/2000 [36:35<07:22,  1.05s/it]

Training_loss 18.76994


 79%|███████▉  | 1582/2000 [36:36<07:11,  1.03s/it]

Training_loss 18.79363


 79%|███████▉  | 1583/2000 [36:37<07:05,  1.02s/it]

Training_loss 18.76002


 79%|███████▉  | 1584/2000 [36:38<06:57,  1.00s/it]

Training_loss 18.79268


 79%|███████▉  | 1585/2000 [36:39<06:51,  1.01it/s]

Training_loss 18.81485


 79%|███████▉  | 1586/2000 [36:40<06:57,  1.01s/it]

Training_loss 18.86795


 79%|███████▉  | 1587/2000 [36:42<07:10,  1.04s/it]

Training_loss 18.86334


 79%|███████▉  | 1588/2000 [36:43<07:32,  1.10s/it]

Training_loss 18.88057


 79%|███████▉  | 1589/2000 [36:44<07:25,  1.08s/it]

Training_loss 18.85986


 80%|███████▉  | 1590/2000 [36:45<07:38,  1.12s/it]

Training_loss 18.83309


 80%|███████▉  | 1591/2000 [36:46<07:37,  1.12s/it]

Training_loss 18.82924


 80%|███████▉  | 1592/2000 [36:47<07:24,  1.09s/it]

Training_loss 18.84259


 80%|███████▉  | 1593/2000 [36:48<07:17,  1.08s/it]

Training_loss 18.90209


 80%|███████▉  | 1594/2000 [36:49<07:10,  1.06s/it]

Training_loss 18.88807


 80%|███████▉  | 1595/2000 [36:50<07:26,  1.10s/it]

Training_loss 18.90656


 80%|███████▉  | 1596/2000 [36:51<07:21,  1.09s/it]

Training_loss 18.87803


 80%|███████▉  | 1597/2000 [36:52<07:02,  1.05s/it]

Training_loss 18.89292


 80%|███████▉  | 1598/2000 [36:53<06:57,  1.04s/it]

Training_loss 18.85547


 80%|███████▉  | 1599/2000 [36:55<07:03,  1.06s/it]

Training_loss 18.86284


 80%|████████  | 1600/2000 [36:56<07:02,  1.06s/it]

Training_loss 18.83067


 80%|████████  | 1601/2000 [36:57<06:54,  1.04s/it]

Training_loss 18.78945


 80%|████████  | 1602/2000 [36:58<06:49,  1.03s/it]

Training_loss 18.75612


 80%|████████  | 1603/2000 [36:59<07:11,  1.09s/it]

Training_loss 18.75227


 80%|████████  | 1604/2000 [37:00<07:00,  1.06s/it]

Training_loss 18.74472


 80%|████████  | 1605/2000 [37:01<06:48,  1.03s/it]

Training_loss 18.73765


 80%|████████  | 1606/2000 [37:02<06:47,  1.04s/it]

Training_loss 18.73282


 80%|████████  | 1607/2000 [37:03<06:44,  1.03s/it]

Training_loss 18.74910


 80%|████████  | 1608/2000 [37:04<06:40,  1.02s/it]

Training_loss 18.74790


 80%|████████  | 1609/2000 [37:05<06:39,  1.02s/it]

Training_loss 18.74474


 80%|████████  | 1610/2000 [37:06<07:06,  1.09s/it]

Training_loss 18.77119


 81%|████████  | 1611/2000 [37:07<07:04,  1.09s/it]

Training_loss 18.76987


 81%|████████  | 1612/2000 [37:08<06:45,  1.05s/it]

Training_loss 18.73623


 81%|████████  | 1613/2000 [37:09<06:35,  1.02s/it]

Training_loss 18.75108


 81%|████████  | 1614/2000 [37:10<06:58,  1.08s/it]

Training_loss 18.73617


 81%|████████  | 1615/2000 [37:11<06:45,  1.05s/it]

Training_loss 18.73660


 81%|████████  | 1616/2000 [37:12<06:31,  1.02s/it]

Training_loss 18.74051


 81%|████████  | 1617/2000 [37:13<06:32,  1.02s/it]

Training_loss 18.75710


 81%|████████  | 1618/2000 [37:14<06:33,  1.03s/it]

Training_loss 18.75108


 81%|████████  | 1619/2000 [37:15<06:28,  1.02s/it]

Training_loss 18.78546


 81%|████████  | 1620/2000 [37:16<06:26,  1.02s/it]

Training_loss 18.79430


 81%|████████  | 1621/2000 [37:17<06:19,  1.00s/it]

Training_loss 18.80273


 81%|████████  | 1622/2000 [37:18<06:12,  1.01it/s]

Training_loss 18.80945


 81%|████████  | 1623/2000 [37:19<06:16,  1.00it/s]

Training_loss 18.84715


 81%|████████  | 1624/2000 [37:20<06:17,  1.00s/it]

Training_loss 18.84927


 81%|████████▏ | 1625/2000 [37:21<06:12,  1.01it/s]

Training_loss 18.79179


 81%|████████▏ | 1626/2000 [37:22<06:33,  1.05s/it]

Training_loss 18.81454


 81%|████████▏ | 1627/2000 [37:24<06:35,  1.06s/it]

Training_loss 18.82421


 81%|████████▏ | 1628/2000 [37:24<06:17,  1.02s/it]

Training_loss 18.84410


 81%|████████▏ | 1629/2000 [37:25<06:12,  1.00s/it]

Training_loss 18.79429


 82%|████████▏ | 1630/2000 [37:26<06:07,  1.01it/s]

Training_loss 18.84699


 82%|████████▏ | 1631/2000 [37:27<06:02,  1.02it/s]

Training_loss 18.83340


 82%|████████▏ | 1632/2000 [37:28<06:05,  1.01it/s]

Training_loss 18.79576


 82%|████████▏ | 1633/2000 [37:29<06:04,  1.01it/s]

Training_loss 18.79874


 82%|████████▏ | 1634/2000 [37:30<05:53,  1.04it/s]

Training_loss 18.83319


 82%|████████▏ | 1635/2000 [37:31<05:58,  1.02it/s]

Training_loss 18.87481


 82%|████████▏ | 1636/2000 [37:32<05:59,  1.01it/s]

Training_loss 18.88227


 82%|████████▏ | 1637/2000 [37:33<06:10,  1.02s/it]

Training_loss 18.90692


 82%|████████▏ | 1638/2000 [37:34<05:57,  1.01it/s]

Training_loss 18.92013


 82%|████████▏ | 1639/2000 [37:35<05:48,  1.04it/s]

Training_loss 18.92785


 82%|████████▏ | 1640/2000 [37:36<05:47,  1.04it/s]

Training_loss 18.92617


 82%|████████▏ | 1641/2000 [37:37<05:48,  1.03it/s]

Training_loss 18.96656


 82%|████████▏ | 1642/2000 [37:38<06:17,  1.05s/it]

Training_loss 18.94228


 82%|████████▏ | 1643/2000 [37:39<06:16,  1.05s/it]

Training_loss 18.97173


 82%|████████▏ | 1644/2000 [37:41<06:10,  1.04s/it]

Training_loss 18.94707


 82%|████████▏ | 1645/2000 [37:41<05:58,  1.01s/it]

Training_loss 18.91716


 82%|████████▏ | 1646/2000 [37:42<05:54,  1.00s/it]

Training_loss 18.87841


 82%|████████▏ | 1647/2000 [37:43<05:43,  1.03it/s]

Training_loss 18.92130


 82%|████████▏ | 1648/2000 [37:44<05:36,  1.04it/s]

Training_loss 18.92262


 82%|████████▏ | 1649/2000 [37:45<05:38,  1.04it/s]

Training_loss 18.90767


 82%|████████▎ | 1650/2000 [37:46<05:44,  1.01it/s]

Training_loss 18.91100


 83%|████████▎ | 1651/2000 [37:47<05:38,  1.03it/s]

Training_loss 18.93556


 83%|████████▎ | 1652/2000 [37:48<05:31,  1.05it/s]

Training_loss 18.94368


 83%|████████▎ | 1653/2000 [37:49<05:36,  1.03it/s]

Training_loss 18.96396


 83%|████████▎ | 1654/2000 [37:50<05:35,  1.03it/s]

Training_loss 18.93020


 83%|████████▎ | 1655/2000 [37:51<05:30,  1.04it/s]

Training_loss 18.90503


 83%|████████▎ | 1656/2000 [37:52<05:35,  1.02it/s]

Training_loss 18.92641


 83%|████████▎ | 1657/2000 [37:53<05:49,  1.02s/it]

Training_loss 18.95508


 83%|████████▎ | 1658/2000 [37:54<06:16,  1.10s/it]

Training_loss 18.93678


 83%|████████▎ | 1659/2000 [37:55<06:02,  1.06s/it]

Training_loss 18.94074


 83%|████████▎ | 1660/2000 [37:57<06:06,  1.08s/it]

Training_loss 18.95928


 83%|████████▎ | 1661/2000 [37:58<06:01,  1.07s/it]

Training_loss 18.95984


 83%|████████▎ | 1662/2000 [37:59<05:59,  1.06s/it]

Training_loss 18.99024


 83%|████████▎ | 1663/2000 [38:00<06:07,  1.09s/it]

Training_loss 19.01580


 83%|████████▎ | 1664/2000 [38:01<06:06,  1.09s/it]

Training_loss 19.02473


 83%|████████▎ | 1665/2000 [38:02<05:58,  1.07s/it]

Training_loss 19.04466


 83%|████████▎ | 1666/2000 [38:03<05:43,  1.03s/it]

Training_loss 19.03249


 83%|████████▎ | 1667/2000 [38:04<05:41,  1.03s/it]

Training_loss 19.05200


 83%|████████▎ | 1668/2000 [38:05<05:34,  1.01s/it]

Training_loss 19.07076


 83%|████████▎ | 1669/2000 [38:06<05:42,  1.03s/it]

Training_loss 19.04756


 84%|████████▎ | 1670/2000 [38:07<05:46,  1.05s/it]

Training_loss 19.09136


 84%|████████▎ | 1671/2000 [38:08<05:43,  1.05s/it]

Training_loss 19.12304


 84%|████████▎ | 1672/2000 [38:09<05:31,  1.01s/it]

Training_loss 19.08634


 84%|████████▎ | 1673/2000 [38:10<05:58,  1.10s/it]

Training_loss 19.09320


 84%|████████▎ | 1674/2000 [38:11<05:51,  1.08s/it]

Training_loss 19.10715


 84%|████████▍ | 1675/2000 [38:12<05:46,  1.07s/it]

Training_loss 19.10263


 84%|████████▍ | 1676/2000 [38:14<06:00,  1.11s/it]

Training_loss 19.08529


 84%|████████▍ | 1677/2000 [38:15<05:51,  1.09s/it]

Training_loss 19.12677


 84%|████████▍ | 1678/2000 [38:16<05:39,  1.05s/it]

Training_loss 19.10565


 84%|████████▍ | 1679/2000 [38:16<05:22,  1.00s/it]

Training_loss 19.13145


 84%|████████▍ | 1680/2000 [38:17<05:24,  1.01s/it]

Training_loss 19.15726


 84%|████████▍ | 1681/2000 [38:19<05:29,  1.03s/it]

Training_loss 19.20397


 84%|████████▍ | 1682/2000 [38:20<05:26,  1.03s/it]

Training_loss 19.21784


 84%|████████▍ | 1683/2000 [38:21<05:32,  1.05s/it]

Training_loss 19.24226


 84%|████████▍ | 1684/2000 [38:22<05:26,  1.03s/it]

Training_loss 19.22924


 84%|████████▍ | 1685/2000 [38:23<05:15,  1.00s/it]

Training_loss 19.23390


 84%|████████▍ | 1686/2000 [38:24<05:17,  1.01s/it]

Training_loss 19.19269


 84%|████████▍ | 1687/2000 [38:25<05:10,  1.01it/s]

Training_loss 19.15709


 84%|████████▍ | 1688/2000 [38:26<05:21,  1.03s/it]

Training_loss 19.16890


 84%|████████▍ | 1689/2000 [38:27<05:29,  1.06s/it]

Training_loss 19.17762


 84%|████████▍ | 1690/2000 [38:28<05:34,  1.08s/it]

Training_loss 19.15032


 85%|████████▍ | 1691/2000 [38:29<05:17,  1.03s/it]

Training_loss 19.17914


 85%|████████▍ | 1692/2000 [38:30<05:11,  1.01s/it]

Training_loss 19.20745


 85%|████████▍ | 1693/2000 [38:31<05:08,  1.01s/it]

Training_loss 19.20711


 85%|████████▍ | 1694/2000 [38:32<05:41,  1.12s/it]

Training_loss 19.21309


 85%|████████▍ | 1695/2000 [38:34<06:09,  1.21s/it]

Training_loss 19.23348


 85%|████████▍ | 1696/2000 [38:35<06:00,  1.18s/it]

Training_loss 19.16964


 85%|████████▍ | 1697/2000 [38:36<05:51,  1.16s/it]

Training_loss 19.19988


 85%|████████▍ | 1698/2000 [38:37<05:37,  1.12s/it]

Training_loss 19.18092


 85%|████████▍ | 1699/2000 [38:38<05:21,  1.07s/it]

Training_loss 19.20510


 85%|████████▌ | 1700/2000 [38:39<05:14,  1.05s/it]

Training_loss 19.24144


 85%|████████▌ | 1701/2000 [38:40<05:09,  1.04s/it]

Training_loss 19.23660


 85%|████████▌ | 1702/2000 [38:41<05:00,  1.01s/it]

Training_loss 19.19840


 85%|████████▌ | 1703/2000 [38:42<05:13,  1.06s/it]

Training_loss 19.19282


 85%|████████▌ | 1704/2000 [38:43<05:20,  1.08s/it]

Training_loss 19.25177


 85%|████████▌ | 1705/2000 [38:44<05:06,  1.04s/it]

Training_loss 19.27016


 85%|████████▌ | 1706/2000 [38:45<05:02,  1.03s/it]

Training_loss 19.24804


 85%|████████▌ | 1707/2000 [38:46<05:00,  1.03s/it]

Training_loss 19.19635


 85%|████████▌ | 1708/2000 [38:47<04:53,  1.01s/it]

Training_loss 19.20463


 85%|████████▌ | 1709/2000 [38:48<04:49,  1.00it/s]

Training_loss 19.20864


 86%|████████▌ | 1710/2000 [38:49<04:50,  1.00s/it]

Training_loss 19.23070


 86%|████████▌ | 1711/2000 [38:50<04:45,  1.01it/s]

Training_loss 19.28932


 86%|████████▌ | 1712/2000 [38:51<04:38,  1.04it/s]

Training_loss 19.31289


 86%|████████▌ | 1713/2000 [38:52<04:33,  1.05it/s]

Training_loss 19.33563


 86%|████████▌ | 1714/2000 [38:53<04:35,  1.04it/s]

Training_loss 19.28228


 86%|████████▌ | 1715/2000 [38:54<04:30,  1.05it/s]

Training_loss 19.24219


 86%|████████▌ | 1716/2000 [38:55<04:34,  1.04it/s]

Training_loss 19.18775


 86%|████████▌ | 1717/2000 [38:56<04:37,  1.02it/s]

Training_loss 19.16336


 86%|████████▌ | 1718/2000 [38:57<04:27,  1.05it/s]

Training_loss 19.17550


 86%|████████▌ | 1719/2000 [38:58<04:43,  1.01s/it]

Training_loss 19.19194


 86%|████████▌ | 1720/2000 [38:59<04:58,  1.07s/it]

Training_loss 19.14997


 86%|████████▌ | 1721/2000 [39:00<04:57,  1.07s/it]

Training_loss 19.16189


 86%|████████▌ | 1722/2000 [39:01<04:49,  1.04s/it]

Training_loss 19.15946


 86%|████████▌ | 1723/2000 [39:02<04:46,  1.03s/it]

Training_loss 19.16961


 86%|████████▌ | 1724/2000 [39:03<04:51,  1.06s/it]

Training_loss 19.14525


 86%|████████▋ | 1725/2000 [39:04<04:49,  1.05s/it]

Training_loss 19.17142


 86%|████████▋ | 1726/2000 [39:05<04:45,  1.04s/it]

Training_loss 19.15600


 86%|████████▋ | 1727/2000 [39:06<04:50,  1.06s/it]

Training_loss 19.19836


 86%|████████▋ | 1728/2000 [39:07<04:44,  1.05s/it]

Training_loss 19.20340


 86%|████████▋ | 1729/2000 [39:08<04:45,  1.05s/it]

Training_loss 19.21672


 86%|████████▋ | 1730/2000 [39:09<04:49,  1.07s/it]

Training_loss 19.18019


 87%|████████▋ | 1731/2000 [39:11<06:01,  1.34s/it]

Training_loss 19.19561


 87%|████████▋ | 1732/2000 [39:14<08:10,  1.83s/it]

Training_loss 19.20216


 87%|████████▋ | 1733/2000 [39:16<08:23,  1.89s/it]

Training_loss 19.16831


 87%|████████▋ | 1734/2000 [39:18<07:28,  1.68s/it]

Training_loss 19.15878


 87%|████████▋ | 1735/2000 [39:19<06:31,  1.48s/it]

Training_loss 19.14115


 87%|████████▋ | 1736/2000 [39:20<06:03,  1.38s/it]

Training_loss 19.18631


 87%|████████▋ | 1737/2000 [39:21<06:20,  1.45s/it]

Training_loss 19.17711


 87%|████████▋ | 1738/2000 [39:22<05:43,  1.31s/it]

Training_loss 19.19769


 87%|████████▋ | 1739/2000 [39:23<05:14,  1.20s/it]

Training_loss 19.21708


 87%|████████▋ | 1740/2000 [39:24<04:51,  1.12s/it]

Training_loss 19.21106


 87%|████████▋ | 1741/2000 [39:25<04:42,  1.09s/it]

Training_loss 19.22512


 87%|████████▋ | 1742/2000 [39:26<04:29,  1.04s/it]

Training_loss 19.21399


 87%|████████▋ | 1743/2000 [39:27<04:33,  1.06s/it]

Training_loss 19.20022


 87%|████████▋ | 1744/2000 [39:28<04:23,  1.03s/it]

Training_loss 19.22341


 87%|████████▋ | 1745/2000 [39:29<04:17,  1.01s/it]

Training_loss 19.21750


 87%|████████▋ | 1746/2000 [39:30<04:31,  1.07s/it]

Training_loss 19.18536


 87%|████████▋ | 1747/2000 [39:31<04:25,  1.05s/it]

Training_loss 19.17602


 87%|████████▋ | 1748/2000 [39:32<04:19,  1.03s/it]

Training_loss 19.19272


 87%|████████▋ | 1749/2000 [39:33<04:11,  1.00s/it]

Training_loss 19.19665


 88%|████████▊ | 1750/2000 [39:34<04:01,  1.03it/s]

Training_loss 19.20298


 88%|████████▊ | 1751/2000 [39:35<04:00,  1.03it/s]

Training_loss 19.21554


 88%|████████▊ | 1752/2000 [39:36<04:03,  1.02it/s]

Training_loss 19.23489


 88%|████████▊ | 1753/2000 [39:37<03:59,  1.03it/s]

Training_loss 19.22905


 88%|████████▊ | 1754/2000 [39:38<04:01,  1.02it/s]

Training_loss 19.25407


 88%|████████▊ | 1755/2000 [39:39<03:55,  1.04it/s]

Training_loss 19.27855


 88%|████████▊ | 1756/2000 [39:40<03:57,  1.03it/s]

Training_loss 19.23419


 88%|████████▊ | 1757/2000 [39:41<04:01,  1.01it/s]

Training_loss 19.26168


 88%|████████▊ | 1758/2000 [39:42<03:53,  1.04it/s]

Training_loss 19.26647


 88%|████████▊ | 1759/2000 [39:43<03:54,  1.03it/s]

Training_loss 19.29077


 88%|████████▊ | 1760/2000 [39:44<03:50,  1.04it/s]

Training_loss 19.29676


 88%|████████▊ | 1761/2000 [39:45<03:47,  1.05it/s]

Training_loss 19.25671


 88%|████████▊ | 1762/2000 [39:46<04:12,  1.06s/it]

Training_loss 19.30361


 88%|████████▊ | 1763/2000 [39:47<04:19,  1.09s/it]

Training_loss 19.31390


 88%|████████▊ | 1764/2000 [39:49<04:42,  1.20s/it]

Training_loss 19.36261


 88%|████████▊ | 1765/2000 [39:50<04:45,  1.22s/it]

Training_loss 19.33248


 88%|████████▊ | 1766/2000 [39:51<04:40,  1.20s/it]

Training_loss 19.27319


 88%|████████▊ | 1767/2000 [39:52<04:40,  1.20s/it]

Training_loss 19.27171


 88%|████████▊ | 1768/2000 [39:54<04:44,  1.23s/it]

Training_loss 19.28349


 88%|████████▊ | 1769/2000 [39:56<05:47,  1.50s/it]

Training_loss 19.26880


 88%|████████▊ | 1770/2000 [39:57<05:44,  1.50s/it]

Training_loss 19.33046


 89%|████████▊ | 1771/2000 [39:59<05:33,  1.45s/it]

Training_loss 19.33389


 89%|████████▊ | 1772/2000 [40:00<05:27,  1.44s/it]

Training_loss 19.34621


 89%|████████▊ | 1773/2000 [40:02<05:36,  1.48s/it]

Training_loss 19.43128


 89%|████████▊ | 1774/2000 [40:03<05:26,  1.44s/it]

Training_loss 19.42522


 89%|████████▉ | 1775/2000 [40:04<05:04,  1.35s/it]

Training_loss 19.42689


 89%|████████▉ | 1776/2000 [40:05<04:53,  1.31s/it]

Training_loss 19.40625


 89%|████████▉ | 1777/2000 [40:07<04:39,  1.25s/it]

Training_loss 19.42910


 89%|████████▉ | 1778/2000 [40:08<04:29,  1.21s/it]

Training_loss 19.44271


 89%|████████▉ | 1779/2000 [40:09<04:21,  1.18s/it]

Training_loss 19.46190


 89%|████████▉ | 1780/2000 [40:10<04:13,  1.15s/it]

Training_loss 19.48948


 89%|████████▉ | 1781/2000 [40:11<04:04,  1.12s/it]

Training_loss 19.50746


 89%|████████▉ | 1782/2000 [40:12<04:00,  1.10s/it]

Training_loss 19.53182


 89%|████████▉ | 1783/2000 [40:13<03:55,  1.08s/it]

Training_loss 19.54347


 89%|████████▉ | 1784/2000 [40:14<04:00,  1.11s/it]

Training_loss 19.53571


 89%|████████▉ | 1785/2000 [40:15<03:57,  1.10s/it]

Training_loss 19.55893


 89%|████████▉ | 1786/2000 [40:16<04:00,  1.12s/it]

Training_loss 19.54655


 89%|████████▉ | 1787/2000 [40:18<04:06,  1.16s/it]

Training_loss 19.56710


 89%|████████▉ | 1788/2000 [40:19<04:11,  1.19s/it]

Training_loss 19.56748


 89%|████████▉ | 1789/2000 [40:20<04:03,  1.16s/it]

Training_loss 19.58683


 90%|████████▉ | 1790/2000 [40:21<04:00,  1.15s/it]

Training_loss 19.61351


 90%|████████▉ | 1791/2000 [40:22<04:01,  1.16s/it]

Training_loss 19.59176


 90%|████████▉ | 1792/2000 [40:23<03:59,  1.15s/it]

Training_loss 19.62872


 90%|████████▉ | 1793/2000 [40:25<03:55,  1.14s/it]

Training_loss 19.65655


 90%|████████▉ | 1794/2000 [40:26<04:15,  1.24s/it]

Training_loss 19.64965


 90%|████████▉ | 1795/2000 [40:27<03:57,  1.16s/it]

Training_loss 19.64741


 90%|████████▉ | 1796/2000 [40:28<03:51,  1.14s/it]

Training_loss 19.61972


 90%|████████▉ | 1797/2000 [40:29<03:50,  1.13s/it]

Training_loss 19.59413


 90%|████████▉ | 1798/2000 [40:30<03:45,  1.12s/it]

Training_loss 19.59947


 90%|████████▉ | 1799/2000 [40:31<03:39,  1.09s/it]

Training_loss 19.59152


 90%|█████████ | 1800/2000 [40:33<03:42,  1.11s/it]

Training_loss 19.58880


 90%|█████████ | 1801/2000 [40:34<03:54,  1.18s/it]

Training_loss 19.62174


 90%|█████████ | 1802/2000 [40:35<04:03,  1.23s/it]

Training_loss 19.62964


 90%|█████████ | 1803/2000 [40:36<03:52,  1.18s/it]

Training_loss 19.65721


 90%|█████████ | 1804/2000 [40:37<03:48,  1.17s/it]

Training_loss 19.64225


 90%|█████████ | 1805/2000 [40:38<03:39,  1.13s/it]

Training_loss 19.70128


 90%|█████████ | 1806/2000 [40:39<03:29,  1.08s/it]

Training_loss 19.75257


 90%|█████████ | 1807/2000 [40:41<03:30,  1.09s/it]

Training_loss 19.72605


 90%|█████████ | 1808/2000 [40:42<03:29,  1.09s/it]

Training_loss 19.74957


 90%|█████████ | 1809/2000 [40:43<03:23,  1.07s/it]

Training_loss 19.73200


 90%|█████████ | 1810/2000 [40:44<03:13,  1.02s/it]

Training_loss 19.73124


 91%|█████████ | 1811/2000 [40:45<03:15,  1.03s/it]

Training_loss 19.68142


 91%|█████████ | 1812/2000 [40:46<03:16,  1.05s/it]

Training_loss 19.69743


 91%|█████████ | 1813/2000 [40:47<03:14,  1.04s/it]

Training_loss 19.63556


 91%|█████████ | 1814/2000 [40:48<03:11,  1.03s/it]

Training_loss 19.66614


 91%|█████████ | 1815/2000 [40:49<03:13,  1.05s/it]

Training_loss 19.60443


 91%|█████████ | 1816/2000 [40:51<03:57,  1.29s/it]

Training_loss 19.62571


 91%|█████████ | 1817/2000 [40:53<05:08,  1.68s/it]

Training_loss 19.58767


 91%|█████████ | 1818/2000 [40:55<05:07,  1.69s/it]

Training_loss 19.53909


 91%|█████████ | 1819/2000 [40:56<04:45,  1.58s/it]

Training_loss 19.58136


 91%|█████████ | 1820/2000 [40:58<04:50,  1.61s/it]

Training_loss 19.59376


 91%|█████████ | 1821/2000 [40:59<04:43,  1.58s/it]

Training_loss 19.59864


 91%|█████████ | 1822/2000 [41:01<04:26,  1.50s/it]

Training_loss 19.57879


 91%|█████████ | 1823/2000 [41:02<04:09,  1.41s/it]

Training_loss 19.60888


 91%|█████████ | 1824/2000 [41:03<03:55,  1.34s/it]

Training_loss 19.64084


 91%|█████████▏| 1825/2000 [41:04<03:43,  1.27s/it]

Training_loss 19.66357


 91%|█████████▏| 1826/2000 [41:05<03:35,  1.24s/it]

Training_loss 19.67611


 91%|█████████▏| 1827/2000 [41:07<03:45,  1.30s/it]

Training_loss 19.63369


 91%|█████████▏| 1828/2000 [41:08<03:33,  1.24s/it]

Training_loss 19.66368


 91%|█████████▏| 1829/2000 [41:09<03:39,  1.29s/it]

Training_loss 19.65249


 92%|█████████▏| 1830/2000 [41:11<03:33,  1.26s/it]

Training_loss 19.69084


 92%|█████████▏| 1831/2000 [41:12<03:25,  1.22s/it]

Training_loss 19.64323


 92%|█████████▏| 1832/2000 [41:13<03:14,  1.15s/it]

Training_loss 19.64312


 92%|█████████▏| 1833/2000 [41:14<03:04,  1.10s/it]

Training_loss 19.57267


 92%|█████████▏| 1834/2000 [41:15<02:57,  1.07s/it]

Training_loss 19.61607


 92%|█████████▏| 1835/2000 [41:16<02:50,  1.03s/it]

Training_loss 19.61695


 92%|█████████▏| 1836/2000 [41:17<02:52,  1.05s/it]

Training_loss 19.66304


 92%|█████████▏| 1837/2000 [41:18<02:58,  1.10s/it]

Training_loss 19.66600


 92%|█████████▏| 1838/2000 [41:20<03:36,  1.34s/it]

Training_loss 19.63687


 92%|█████████▏| 1839/2000 [41:21<03:40,  1.37s/it]

Training_loss 19.58164


 92%|█████████▏| 1840/2000 [41:24<04:22,  1.64s/it]

Training_loss 19.63383


 92%|█████████▏| 1841/2000 [41:26<05:14,  1.98s/it]

Training_loss 19.62045


 92%|█████████▏| 1842/2000 [41:28<04:53,  1.86s/it]

Training_loss 19.62735


 92%|█████████▏| 1843/2000 [41:29<04:20,  1.66s/it]

Training_loss 19.67282


 92%|█████████▏| 1844/2000 [41:31<04:13,  1.62s/it]

Training_loss 19.68264


 92%|█████████▏| 1845/2000 [41:32<03:57,  1.53s/it]

Training_loss 19.70540


 92%|█████████▏| 1846/2000 [41:33<03:40,  1.43s/it]

Training_loss 19.71601


 92%|█████████▏| 1847/2000 [41:34<03:25,  1.35s/it]

Training_loss 19.66575


 92%|█████████▏| 1848/2000 [41:35<03:16,  1.29s/it]

Training_loss 19.64515


 92%|█████████▏| 1849/2000 [41:37<03:13,  1.28s/it]

Training_loss 19.65309


 92%|█████████▎| 1850/2000 [41:38<03:19,  1.33s/it]

Training_loss 19.65901


 93%|█████████▎| 1851/2000 [41:39<03:18,  1.33s/it]

Training_loss 19.65757


 93%|█████████▎| 1852/2000 [41:41<03:19,  1.35s/it]

Training_loss 19.64286


 93%|█████████▎| 1853/2000 [41:43<03:38,  1.49s/it]

Training_loss 19.62599


 93%|█████████▎| 1854/2000 [41:44<03:48,  1.57s/it]

Training_loss 19.56873


 93%|█████████▎| 1855/2000 [41:46<03:38,  1.51s/it]

Training_loss 19.59102


 93%|█████████▎| 1856/2000 [41:47<03:26,  1.43s/it]

Training_loss 19.58806


 93%|█████████▎| 1857/2000 [41:48<03:14,  1.36s/it]

Training_loss 19.58994


 93%|█████████▎| 1858/2000 [41:49<03:08,  1.32s/it]

Training_loss 19.58118


 93%|█████████▎| 1859/2000 [41:51<03:19,  1.42s/it]

Training_loss 19.60413


 93%|█████████▎| 1860/2000 [41:52<03:11,  1.37s/it]

Training_loss 19.66083


 93%|█████████▎| 1861/2000 [41:54<03:04,  1.33s/it]

Training_loss 19.67281


 93%|█████████▎| 1862/2000 [41:55<03:11,  1.39s/it]

Training_loss 19.63784


 93%|█████████▎| 1863/2000 [41:57<03:11,  1.40s/it]

Training_loss 19.61738


 93%|█████████▎| 1864/2000 [41:58<03:13,  1.42s/it]

Training_loss 19.60430


 93%|█████████▎| 1865/2000 [42:00<03:18,  1.47s/it]

Training_loss 19.61878


 93%|█████████▎| 1866/2000 [42:01<03:04,  1.38s/it]

Training_loss 19.65469


 93%|█████████▎| 1867/2000 [42:02<02:53,  1.30s/it]

Training_loss 19.64778


 93%|█████████▎| 1868/2000 [42:03<02:43,  1.24s/it]

Training_loss 19.56726


 93%|█████████▎| 1869/2000 [42:04<02:42,  1.24s/it]

Training_loss 19.61507


 94%|█████████▎| 1870/2000 [42:05<02:41,  1.24s/it]

Training_loss 19.62678


 94%|█████████▎| 1871/2000 [42:07<02:50,  1.32s/it]

Training_loss 19.68542


 94%|█████████▎| 1872/2000 [42:08<02:52,  1.35s/it]

Training_loss 19.68007


 94%|█████████▎| 1873/2000 [42:10<03:11,  1.51s/it]

Training_loss 19.66889


 94%|█████████▎| 1874/2000 [42:12<03:23,  1.62s/it]

Training_loss 19.68622


 94%|█████████▍| 1875/2000 [42:14<03:15,  1.56s/it]

Training_loss 19.69474


 94%|█████████▍| 1876/2000 [42:15<03:15,  1.57s/it]

Training_loss 19.67459


 94%|█████████▍| 1877/2000 [42:16<03:04,  1.50s/it]

Training_loss 19.66793


 94%|█████████▍| 1878/2000 [42:18<02:49,  1.39s/it]

Training_loss 19.69034


 94%|█████████▍| 1879/2000 [42:19<02:40,  1.33s/it]

Training_loss 19.69236


 94%|█████████▍| 1880/2000 [42:20<02:40,  1.34s/it]

Training_loss 19.68618


 94%|█████████▍| 1881/2000 [42:22<02:41,  1.35s/it]

Training_loss 19.65228


 94%|█████████▍| 1882/2000 [42:23<02:41,  1.37s/it]

Training_loss 19.67152


 94%|█████████▍| 1883/2000 [42:25<02:45,  1.42s/it]

Training_loss 19.67664


 94%|█████████▍| 1884/2000 [42:26<02:55,  1.51s/it]

Training_loss 19.68617


 94%|█████████▍| 1885/2000 [42:28<03:06,  1.62s/it]

Training_loss 19.65062


 94%|█████████▍| 1886/2000 [42:30<03:00,  1.59s/it]

Training_loss 19.62631


 94%|█████████▍| 1887/2000 [42:31<03:01,  1.61s/it]

Training_loss 19.65441


 94%|█████████▍| 1888/2000 [42:33<03:03,  1.64s/it]

Training_loss 19.72709


 94%|█████████▍| 1889/2000 [42:35<03:25,  1.85s/it]

Training_loss 19.79384


 94%|█████████▍| 1890/2000 [42:37<03:03,  1.67s/it]

Training_loss 19.77068


 95%|█████████▍| 1891/2000 [42:38<02:54,  1.60s/it]

Training_loss 19.73426


 95%|█████████▍| 1892/2000 [42:39<02:36,  1.45s/it]

Training_loss 19.73769


 95%|█████████▍| 1893/2000 [42:40<02:20,  1.31s/it]

Training_loss 19.75175


 95%|█████████▍| 1894/2000 [42:42<02:23,  1.35s/it]

Training_loss 19.79033


 95%|█████████▍| 1895/2000 [42:43<02:35,  1.48s/it]

Training_loss 19.78183


 95%|█████████▍| 1896/2000 [42:45<02:28,  1.43s/it]

Training_loss 19.77183


 95%|█████████▍| 1897/2000 [42:46<02:25,  1.41s/it]

Training_loss 19.74990


 95%|█████████▍| 1898/2000 [42:47<02:20,  1.38s/it]

Training_loss 19.75927


 95%|█████████▍| 1899/2000 [42:48<02:08,  1.27s/it]

Training_loss 19.77981


 95%|█████████▌| 1900/2000 [42:49<01:52,  1.13s/it]

Training_loss 19.84050


 95%|█████████▌| 1901/2000 [42:50<01:42,  1.04s/it]

Training_loss 19.80118


 95%|█████████▌| 1902/2000 [42:51<01:34,  1.03it/s]

Training_loss 19.81645


 95%|█████████▌| 1903/2000 [42:52<01:28,  1.10it/s]

Training_loss 19.79198


 95%|█████████▌| 1904/2000 [42:53<01:32,  1.03it/s]

Training_loss 19.76861


 95%|█████████▌| 1905/2000 [42:54<01:52,  1.19s/it]

Training_loss 19.79864


 95%|█████████▌| 1906/2000 [42:55<01:48,  1.15s/it]

Training_loss 19.77303


 95%|█████████▌| 1907/2000 [42:57<01:50,  1.18s/it]

Training_loss 19.74262


 95%|█████████▌| 1908/2000 [42:59<02:29,  1.62s/it]

Training_loss 19.73309


 95%|█████████▌| 1909/2000 [43:01<02:22,  1.57s/it]

Training_loss 19.73278


 96%|█████████▌| 1910/2000 [43:02<02:07,  1.41s/it]

Training_loss 19.72806


 96%|█████████▌| 1911/2000 [43:03<02:11,  1.48s/it]

Training_loss 19.72924


 96%|█████████▌| 1912/2000 [43:05<02:07,  1.45s/it]

Training_loss 19.74705


 96%|█████████▌| 1913/2000 [43:07<02:36,  1.80s/it]

Training_loss 19.74745


 96%|█████████▌| 1914/2000 [43:08<02:15,  1.57s/it]

Training_loss 19.73709


 96%|█████████▌| 1915/2000 [43:10<02:18,  1.62s/it]

Training_loss 19.69403


 96%|█████████▌| 1916/2000 [43:11<02:05,  1.50s/it]

Training_loss 19.70116


 96%|█████████▌| 1917/2000 [43:13<01:58,  1.42s/it]

Training_loss 19.72983


 96%|█████████▌| 1918/2000 [43:14<02:02,  1.49s/it]

Training_loss 19.73179


 96%|█████████▌| 1919/2000 [43:16<01:58,  1.46s/it]

Training_loss 19.73760


 96%|█████████▌| 1920/2000 [43:17<01:54,  1.43s/it]

Training_loss 19.76961


 96%|█████████▌| 1921/2000 [43:18<01:50,  1.40s/it]

Training_loss 19.80334


 96%|█████████▌| 1922/2000 [43:20<01:45,  1.35s/it]

Training_loss 19.83310


 96%|█████████▌| 1923/2000 [43:21<01:36,  1.26s/it]

Training_loss 19.83999


 96%|█████████▌| 1924/2000 [43:22<01:31,  1.20s/it]

Training_loss 19.82451


 96%|█████████▋| 1925/2000 [43:23<01:28,  1.18s/it]

Training_loss 19.80090


 96%|█████████▋| 1926/2000 [43:24<01:23,  1.13s/it]

Training_loss 19.76291


 96%|█████████▋| 1927/2000 [43:25<01:21,  1.12s/it]

Training_loss 19.72875


 96%|█████████▋| 1928/2000 [43:26<01:20,  1.12s/it]

Training_loss 19.76080


 96%|█████████▋| 1929/2000 [43:27<01:16,  1.08s/it]

Training_loss 19.79891


 96%|█████████▋| 1930/2000 [43:28<01:14,  1.07s/it]

Training_loss 19.79916


 97%|█████████▋| 1931/2000 [43:29<01:11,  1.03s/it]

Training_loss 19.85986


 97%|█████████▋| 1932/2000 [43:30<01:14,  1.10s/it]

Training_loss 19.87903


 97%|█████████▋| 1933/2000 [43:31<01:13,  1.09s/it]

Training_loss 19.87253


 97%|█████████▋| 1934/2000 [43:32<01:11,  1.08s/it]

Training_loss 19.88519


 97%|█████████▋| 1935/2000 [43:34<01:11,  1.09s/it]

Training_loss 19.84823


 97%|█████████▋| 1936/2000 [43:35<01:06,  1.05s/it]

Training_loss 19.85592


 97%|█████████▋| 1937/2000 [43:35<01:04,  1.02s/it]

Training_loss 19.91028


 97%|█████████▋| 1938/2000 [43:36<01:02,  1.01s/it]

Training_loss 19.86678


 97%|█████████▋| 1939/2000 [43:37<01:01,  1.00s/it]

Training_loss 19.89749


 97%|█████████▋| 1940/2000 [43:38<00:59,  1.01it/s]

Training_loss 19.90847


 97%|█████████▋| 1941/2000 [43:39<01:00,  1.02s/it]

Training_loss 19.90685


 97%|█████████▋| 1942/2000 [43:41<01:02,  1.07s/it]

Training_loss 19.94050


 97%|█████████▋| 1943/2000 [43:42<01:12,  1.27s/it]

Training_loss 19.97049


 97%|█████████▋| 1944/2000 [43:44<01:19,  1.41s/it]

Training_loss 20.00872


 97%|█████████▋| 1945/2000 [43:47<01:44,  1.90s/it]

Training_loss 20.01630


 97%|█████████▋| 1946/2000 [43:49<01:45,  1.95s/it]

Training_loss 19.98313


 97%|█████████▋| 1947/2000 [43:51<01:34,  1.79s/it]

Training_loss 19.96654


 97%|█████████▋| 1948/2000 [43:52<01:33,  1.80s/it]

Training_loss 19.93325


 97%|█████████▋| 1949/2000 [43:54<01:32,  1.81s/it]

Training_loss 19.95409


 98%|█████████▊| 1950/2000 [43:56<01:22,  1.64s/it]

Training_loss 20.00087


 98%|█████████▊| 1951/2000 [43:57<01:11,  1.46s/it]

Training_loss 20.06630


 98%|█████████▊| 1952/2000 [43:58<01:04,  1.35s/it]

Training_loss 20.06829


 98%|█████████▊| 1953/2000 [43:59<01:02,  1.34s/it]

Training_loss 20.07324


 98%|█████████▊| 1954/2000 [44:00<01:00,  1.32s/it]

Training_loss 20.04665


 98%|█████████▊| 1955/2000 [44:02<00:59,  1.32s/it]

Training_loss 20.05063


 98%|█████████▊| 1956/2000 [44:03<00:58,  1.33s/it]

Training_loss 20.06276


 98%|█████████▊| 1957/2000 [44:04<00:55,  1.28s/it]

Training_loss 20.04396


 98%|█████████▊| 1958/2000 [44:05<00:51,  1.23s/it]

Training_loss 20.04978


 98%|█████████▊| 1959/2000 [44:06<00:49,  1.21s/it]

Training_loss 19.99769


 98%|█████████▊| 1960/2000 [44:08<00:48,  1.21s/it]

Training_loss 20.02168


 98%|█████████▊| 1961/2000 [44:09<00:47,  1.22s/it]

Training_loss 20.04307


 98%|█████████▊| 1962/2000 [44:10<00:47,  1.25s/it]

Training_loss 20.04635


 98%|█████████▊| 1963/2000 [44:11<00:45,  1.22s/it]

Training_loss 20.01467


 98%|█████████▊| 1964/2000 [44:13<00:43,  1.21s/it]

Training_loss 20.05099


 98%|█████████▊| 1965/2000 [44:14<00:41,  1.19s/it]

Training_loss 20.03380


 98%|█████████▊| 1966/2000 [44:15<00:45,  1.33s/it]

Training_loss 20.07165


 98%|█████████▊| 1967/2000 [44:17<00:42,  1.30s/it]

Training_loss 20.06289


 98%|█████████▊| 1968/2000 [44:18<00:43,  1.35s/it]

Training_loss 20.06145


 98%|█████████▊| 1969/2000 [44:19<00:41,  1.35s/it]

Training_loss 20.09164


 98%|█████████▊| 1970/2000 [44:20<00:38,  1.28s/it]

Training_loss 20.11372


 99%|█████████▊| 1971/2000 [44:22<00:35,  1.22s/it]

Training_loss 20.07960


 99%|█████████▊| 1972/2000 [44:23<00:32,  1.17s/it]

Training_loss 20.05053


 99%|█████████▊| 1973/2000 [44:24<00:32,  1.20s/it]

Training_loss 20.10799


 99%|█████████▊| 1974/2000 [44:25<00:32,  1.24s/it]

Training_loss 20.13078


 99%|█████████▉| 1975/2000 [44:26<00:30,  1.23s/it]

Training_loss 20.11951


 99%|█████████▉| 1976/2000 [44:28<00:30,  1.27s/it]

Training_loss 20.11451


 99%|█████████▉| 1977/2000 [44:29<00:29,  1.29s/it]

Training_loss 20.18830


 99%|█████████▉| 1978/2000 [44:30<00:28,  1.29s/it]

Training_loss 20.17700


 99%|█████████▉| 1979/2000 [44:32<00:26,  1.26s/it]

Training_loss 20.11557


 99%|█████████▉| 1980/2000 [44:33<00:24,  1.23s/it]

Training_loss 20.12042


 99%|█████████▉| 1981/2000 [44:34<00:23,  1.25s/it]

Training_loss 20.14269


 99%|█████████▉| 1982/2000 [44:35<00:21,  1.22s/it]

Training_loss 20.11065


 99%|█████████▉| 1983/2000 [44:37<00:23,  1.37s/it]

Training_loss 20.07815


 99%|█████████▉| 1984/2000 [44:38<00:21,  1.37s/it]

Training_loss 20.07794


 99%|█████████▉| 1985/2000 [44:40<00:20,  1.35s/it]

Training_loss 20.08264


 99%|█████████▉| 1986/2000 [44:41<00:18,  1.31s/it]

Training_loss 20.08805


 99%|█████████▉| 1987/2000 [44:42<00:16,  1.29s/it]

Training_loss 20.05827


 99%|█████████▉| 1988/2000 [44:43<00:14,  1.25s/it]

Training_loss 20.07257


 99%|█████████▉| 1989/2000 [44:44<00:13,  1.23s/it]

Training_loss 20.12773


100%|█████████▉| 1990/2000 [44:46<00:12,  1.22s/it]

Training_loss 20.13635


100%|█████████▉| 1991/2000 [44:47<00:11,  1.24s/it]

Training_loss 20.10800


100%|█████████▉| 1992/2000 [44:48<00:10,  1.26s/it]

Training_loss 20.13447


100%|█████████▉| 1993/2000 [44:49<00:08,  1.27s/it]

Training_loss 20.13196


100%|█████████▉| 1994/2000 [44:51<00:07,  1.30s/it]

Training_loss 20.22488


100%|█████████▉| 1995/2000 [44:52<00:06,  1.29s/it]

Training_loss 20.27213


100%|█████████▉| 1996/2000 [44:54<00:05,  1.42s/it]

Training_loss 20.26451


100%|█████████▉| 1997/2000 [44:56<00:04,  1.54s/it]

Training_loss 20.27552


100%|█████████▉| 1998/2000 [44:58<00:03,  1.76s/it]

Training_loss 20.30001


100%|█████████▉| 1999/2000 [44:59<00:01,  1.65s/it]

Training_loss 20.29303


100%|██████████| 2000/2000 [45:00<00:00,  1.35s/it]

Training_loss 20.24063





In [21]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([-0.0460,  1.1812,  0.4610,  0.4469,  0.2511, -0.2240,  0.1706,  0.3388,
        -0.6996,  0.2031,  0.8270, -0.9598, -0.0157,  0.6698, -0.2592,  0.6487,
         0.8470,  0.0588,  1.2591, -0.5529,  0.2962, -0.2062, -0.2245, -0.2269,
         0.0198,  0.7513,  0.3471, -0.3338,  0.7348, -0.5020,  0.3347, -0.0425,
        -0.2992, -0.1321,  0.2386,  0.4576,  2.3006,  2.1637,  2.1098,  1.4697],
       grad_fn=<CatBackward0>)

In [22]:
for j in G.neighbors(0):
    print(j)

1
3


In [23]:
parameters_to_vector(models[0].parameters())

tensor([-0.1450,  0.1424,  0.0127,  0.0371, -0.0330, -0.2232, -0.2274, -0.1364,
         0.2306,  0.5960, -1.3876, -0.0987,  0.0926,  0.2494, -0.0292,  0.4427,
        -0.3820, -0.5574, -0.1521, -0.8897,  0.8261, -0.1268,  0.0452, -0.7876,
         0.0793,  1.2068, -0.3966, -1.2764,  0.4059, -0.2673,  0.4005,  0.1259,
        -0.1594,  0.1021,  0.9017,  0.9445,  0.5037,  1.8917,  2.2124,  2.0141],
       grad_fn=<CatBackward0>)

In [24]:
projection_list[0]

[0,
 tensor([[1.0529, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.0529, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 1.0529,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 1.0529, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.0529, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0529]]),
 0,
 tensor([[1.2510, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.2510, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 1.2510,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 1.2510, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.2510, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.2510]]),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [25]:
projected_weights[0]

[0,
 tensor([ 0.1243, -0.1060, -0.2898,  0.3380,  0.2483,  0.0919,  0.2717,  0.1301,
         -0.3367, -0.0290,  0.3086,  0.1434,  0.3225, -0.0740,  0.1424,  0.2804,
         -0.2284, -0.3479,  0.1432,  0.2785,  0.2804,  0.1387, -0.0847, -0.1907,
         -0.3337, -0.1719,  0.3415, -0.2128, -0.0271,  0.3491, -0.3455, -0.1790,
          0.0926,  0.1035,  0.2593, -0.0221,  0.1437,  0.0775,  0.2320, -0.1122]),
 0,
 tensor([ 0.2555,  0.1692,  0.0845,  0.1881, -0.1884,  0.1152, -0.3656, -0.1480,
          0.2105, -0.0706, -0.2295,  0.1474, -0.2375,  0.0866,  0.0462,  0.0965,
          0.0970, -0.3092,  0.3336,  0.0669,  0.3328, -0.3561, -0.0212,  0.1061,
         -0.2490, -0.3698,  0.0536,  0.2303,  0.3627,  0.3105, -0.0190,  0.3036,
          0.3711, -0.1756,  0.3514, -0.2361, -0.3895,  0.0123,  0.1991,  0.4181]),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [26]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [27]:
np.save( 'training_loss_dfedu' + str(eta).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), test_loss)
#np.save('relative_error_dfedu' + str(eta).replace('.', '_'), total_rel_error)