In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.01
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-2
mu = 1e-2
no_users = sum(cluster_sizes)
batch_size = 50
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.11111111 0.         0.         0.14285714 0.
  0.         0.14285714 0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.11111111
  0.         0.14285714]
 [0.11111111 0.         0.11111111 0.11111111 0.11111111 0.11111111
  0.11111111 0.11111111 0.11111111 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.11111111 0.         0.125      0.125      0.
  0.125      0.         0.125      0.         0.         0.
  0.         0.         0.         0.         0.125      0.
  0.         0.125     ]
 [0.         0.11111111 0.125      0.         0.         0.16666667
  0.125      0.         0.14285714 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.14285714 0.11111111 0.125      0.         0.         0.
  0.125      0.16666667 0.         0.         0.         0.
  0.         0.         0.         0

In [4]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [5]:
degree_list = [G.degree(i) for i in range(no_users)]
print(degree_list)

for i in G.neighbors(0):
    print(i)

[5, 6, 8, 7, 5, 5, 4, 7, 3, 6, 1, 5, 3, 4, 5, 3, 6, 6, 8, 3]
1
3
6
7
9


In [6]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.size)
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split





X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')


X, y = random_split(X_train, y_train, no_users, 1234)

In [7]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 3.0, 3.0])
W2 = np.array([-2.0, 2.0, 3.0, -3.0])
W3 = 2 * W1
W4 = 2  * W2
W = [W1, W2]
m = 200
n = 4

scaler = [1.0, -1.0]

noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i ]) + np.random.normal(0,noise_sd)
        data = X[count]
        data[:, 0:4] *= scaler[i]
        datapoints[count] = {
                'features': data,
                'degree': node_degree(count, G),
                'label': y[count],
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [8]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [9]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(9, 4, bias=False)
        self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        #output = self.fc3(x)
        return output

In [10]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [11]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        #new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        #vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(48.5478, grad_fn=<MseLossBackward0>) tensor([-0.3470,  0.2523,  1.7713, -0.8372,  0.8539,  0.5348,  0.7210, -0.8715,
         0.0837,  0.7392, -0.5987,  0.5631, -0.9992,  0.8662,  0.7514,  0.9207,
         0.2095,  1.8827, -2.2354,  1.8028,  0.5331,  1.0187, -0.5971, -0.3320,
        -0.7662, -0.6368, -3.5024, -0.2368,  0.7241, -0.6128,  0.1144, -0.8239,
        -0.7873, -0.6590,  2.3783,  0.7408, -1.0362, -1.3526, -1.9344, -3.2982]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(53.2632, grad_fn=<MseLossBackwar

torch.Size([50, 1])
torch.Size([50, 1])
3 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143, -1.0773,  0.6216,  0.5742,  0.6529,  0.8887,
        -0.5158,  0.4361, -0.1033,  0.5256, -1.1289,  1.1783,  1.1414,  1.2277,
         0.3584,  1.5712, -2.1064,  1.6151,  1.0772,  0.2488, -0.2068, -0.0297,
        -0.2447, -0.1095, -2.6379, -0.2745,  0.7910,  0.1867, -0.1392, -0.0426,
        -0.3023, -0.0393,  1.2926,  0.8497, -1.1144, -1.2262, -1.6318, -1.9447]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
3 tensor(72.6023, grad_fn=<MseLossBackwar

        -0.7221, -0.0857,  1.0538,  0.7574, -1.3895, -1.1931, -1.9930, -2.1967]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
6 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) 

torch.Size([50, 1])
torch.Size([50, 1])
9 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907, -0.2683,  0.1023,  0.1971, -0.1172,  0.2133,
        -0.0290,  0.8322, -0.5512,  0.3451, -1.1951,  0.8637,  0.8191,  1.0500,
         1.1310,  0.7502, -2.8864,  2.0864,  2.0721,  0.7380, -0.7417,  0.1730,
        -1.0250, -0.9708, -2.5493, -0.4815,  0.9874, -0.7138, -0.5699, -0.4095,
        -0.4800, -0.1218,  2.4522,  0.1586, -1.5514, -0.9161, -2.2711, -3.5592]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
9 tensor(54.9587, grad_fn=<MseLossBackwar

        -0.2735,  0.1847,  1.3872, -0.3643, -1.5396, -1.3106, -1.2146, -2.2533]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
12 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742, -0.8456,  0.7764,  0.6515,  0.7106,  0.3070,
         0.4539,  0.3019, -0.2493,  0.6548, -1.8096,  1.6734,  1.1914,  1.7863,
         0.0902,  0.9624, -2.7431,  2.2244, -0.4050,  0.4287, -0.4894,  0.1019,
        -0.6103, -0.5838, -1.4356, -0.3633,  0.6598,  0.5126,  0.4282, -0.9356,
        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401])

        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
15 tensor(39.3987, grad_fn=<MseLossBackward0>) tensor([-1.7608,  1.6497, -0.3491, -0.0519,  0.3156, -1.9920,  0.3433, -0.0591,
         0.1415,  1.0966, -1.0129,  0.7207,  0.0373, -0.4052,  0.9244, -0.3582,
        -0.2096,  2.2112, -1.6812,  1.5529, -1.1049, -0.0573,  0.6212, -1.4171,
         0.5491,  0.3213, -3.3901, -1.7472,  2.0221, -0.8522, -0.3703,  0.6180,
        -0.1737,  0.7782, -0.5592,  1.3382, -0.9008, -1.4498, -1.2327, -1.0667]) t

torch.Size([50, 1])
torch.Size([50, 1])
19 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785, -0.4868,  0.1399, -0.8561,  0.1180,  0.5288,
        -0.0770,  1.1890, -0.8977,  0.6288, -1.6728,  1.1446,  1.2392,  1.1784,
         0.9960,  1.4934, -3.1282,  2.3670, -0.3376,  1.2944, -0.7661, -0.6377,
        -0.9672, -1.1636, -1.6647,  0.2956,  0.2395,  0.7125, -0.8863,  0.1634,
        -0.3413,  0.2162,  2.0005,  0.1332, -1.1790, -1.6021, -1.7005, -2.6349]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
19 tensor(46.8444, grad_fn=<MseLossBackw

22 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143, -1.0773,  0.6216,  0.5742,  0.6529,  0.8887,
        -0.5158,  0.4361, -0.1033,  0.5256, -1.1289,  1.1783,  1.1414,  1.2277,
         0.3584,  1.5712, -2.1064,  1.6151,  1.0772,  0.2488, -0.2068, -0.0297,
        -0.2447, -0.1095, -2.6379, -0.2745,  0.7910,  0.1867, -0.1392, -0.0426,
        -0.3023, -0.0393,  1.2926,  0.8497, -1.1144, -1.2262, -1.6318, -1.9447]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
22 tensor(72.6023, grad_fn=<MseLossBackward0>) tensor([-0.2359, -0.3389,  4.5363

25 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
25 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907

torch.Size([50, 1])
torch.Size([50, 1])
26 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137, -0.3601, -0.4910,  0.0381, -0.6553,  0.5024,
        -0.6456,  0.7998, -0.6238,  0.7683, -2.0511,  1.5168,  1.2284,  1.6623,
         1.2386,  1.1638, -2.8854,  2.3294,  0.9668,  1.4833, -1.1850, -0.4777,
        -1.4092, -0.9460, -2.8588,  0.4911, -0.2834, -0.0360, -1.3643,  0.4974,
         0.5757,  0.7148,  1.8643,  0.9043, -1.5223, -1.5837, -2.1258, -2.1235]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
26 tensor(44.3275, grad_fn=<MseLossBackw

28 tensor(39.3987, grad_fn=<MseLossBackward0>) tensor([-1.7608,  1.6497, -0.3491, -0.0519,  0.3156, -1.9920,  0.3433, -0.0591,
         0.1415,  1.0966, -1.0129,  0.7207,  0.0373, -0.4052,  0.9244, -0.3582,
        -0.2096,  2.2112, -1.6812,  1.5529, -1.1049, -0.0573,  0.6212, -1.4171,
         0.5491,  0.3213, -3.3901, -1.7472,  2.0221, -0.8522, -0.3703,  0.6180,
        -0.1737,  0.7782, -0.5592,  1.3382, -0.9008, -1.4498, -1.2327, -1.0667]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
29 tensor(48.5478, grad_fn=<MseLossBackward0>) tensor([-0.3470,  0.2523,  1.7713

torch.Size([50, 1])
torch.Size([50, 1])
32 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,  1.8682e+00,  3.8455e-02, -7.7630e-01,
        -1.5728e+00, -9.2286e-01,  5.1943e-01,  3.8892e-01, -3.9379e-02,
         4.0169e-01,  5.7737e-01, -1.9977e+00,  1.7382e+00,  1.3465e+00,
         1.5271e+00,  1.0047e+00,  1.7982e+00, -1.4762e+00,  7.1747e-01,
        -6.8187e-01,  9.0230e-01, -7.9495e-01, -6.4006e-01, -8.1657e-01,
        -1.8609e+00, -3.2403e+00, -1.0703e+00,  1.5178e+00,  6.4488e-01,
        -8.6450e-01, -1.9118e-02, -2.5757e-01, -1.0304e-01,  3.1014e+00,
         8.5843e-04, -1.4660e+00, -1.6253e+00, -1.3875e+00, -3.8590e+00]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        

35 tensor(47.5602, grad_fn=<MseLossBackward0>) tensor([-0.2632, -0.0716,  2.3448, -0.7451,  0.0638, -0.2418, -0.0732,  0.7007,
        -0.2045,  1.0157, -0.7044,  0.2885, -0.9572,  0.8589,  0.4627,  0.8170,
         0.5912,  2.0071, -2.3329,  1.6991,  0.8287,  0.6592, -0.5660,  0.0061,
        -0.7934, -0.7673, -3.1640, -0.2502,  0.5918, -0.2201, -0.2832, -0.1801,
        -0.7221, -0.0857,  1.0538,  0.7574, -1.3895, -1.1931, -1.9930, -2.1967]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
35 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615

torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(56.7572, grad_fn=<MseLossBackw

41 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907, -0.2683,  0.1023,  0.1971, -0.1172,  0.2133,
        -0.0290,  0.8322, -0.5512,  0.3451, -1.1951,  0.8637,  0.8191,  1.0500,
         1.1310,  0.7502, -2.8864,  2.0864,  2.0721,  0.7380, -0.7417,  0.1730,
        -1.0250, -0.9708, -2.5493, -0.4815,  0.9874, -0.7138, -0.5699, -0.4095,
        -0.4800, -0.1218,  2.4522,  0.1586, -1.5514, -0.9161, -2.2711, -3.5592]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
41 tensor(54.9587, grad_fn=<MseLossBackward0>) tensor([-0.1086, -0.0142,  2.2422

torch.Size([50, 1])
torch.Size([50, 1])
44 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742, -0.8456,  0.7764,  0.6515,  0.7106,  0.3070,
         0.4539,  0.3019, -0.2493,  0.6548, -1.8096,  1.6734,  1.1914,  1.7863,
         0.0902,  0.9624, -2.7431,  2.2244, -0.4050,  0.4287, -0.4894,  0.1019,
        -0.6103, -0.5838, -1.4356, -0.3633,  0.6598,  0.5126,  0.4282, -0.9356,
        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
44 tensor(39.3987, grad_fn=<MseLossBackwar

48 tensor(48.5478, grad_fn=<MseLossBackward0>) tensor([-0.3470,  0.2523,  1.7713, -0.8372,  0.8539,  0.5348,  0.7210, -0.8715,
         0.0837,  0.7392, -0.5987,  0.5631, -0.9992,  0.8662,  0.7514,  0.9207,
         0.2095,  1.8827, -2.2354,  1.8028,  0.5331,  1.0187, -0.5971, -0.3320,
        -0.7662, -0.6368, -3.5024, -0.2368,  0.7241, -0.6128,  0.1144, -0.8239,
        -0.7873, -0.6590,  2.3783,  0.7408, -1.0362, -1.3526, -1.9344, -3.2982]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
48 tensor(53.2632, grad_fn=<MseLossBackward0>) tensor([-1.1753,  0.5564,  3.6616

50 tensor(54.9587, grad_fn=<MseLossBackward0>) tensor([-0.1086, -0.0142,  2.2422, -0.5951,  0.7224,  0.5913,  0.6174, -0.6083,
         0.1164,  0.7156, -0.3369,  0.4894, -1.7996,  1.2586,  0.9207,  1.3520,
         1.7992,  2.4717, -2.5275,  1.8485,  0.4820,  1.3939, -0.3597, -0.1901,
        -0.5273, -2.3581, -3.3643,  0.0610,  0.4565, -0.3277, -0.6048, -0.4218,
        -0.3542, -0.2447,  3.0350,  1.1598, -1.0158, -1.7802, -2.1015, -3.8842]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(46.5483, grad_fn=<MseLossBackward0>) tensor([ 0.1418, -0.3696,  2.4748

        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
53 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907, -0.2683,  0.1023,  0.1971, -0.1172,  0.2133,
        -0.0290,  0.8322, -0.5512,  0.3451, -1.1951,  0.8637,  0.8191,  1.0500,
         1.1310,  0.7502, -2.8864,  2.0864,  2.0721,  0.7380, -0.7417,  0.1730,
        -1.0250, -0.9708, -2.5493, -0.4815,  0.9874, -0.7138, -0.5699, -0.4095,
        -0.4800, -0.1218,  2.4522,  0.1586, -1.5514, -0.9161, -2.2711, -3.5592])

56 tensor(54.9587, grad_fn=<MseLossBackward0>) tensor([-0.1086, -0.0142,  2.2422, -0.5951,  0.7224,  0.5913,  0.6174, -0.6083,
         0.1164,  0.7156, -0.3369,  0.4894, -1.7996,  1.2586,  0.9207,  1.3520,
         1.7992,  2.4717, -2.5275,  1.8485,  0.4820,  1.3939, -0.3597, -0.1901,
        -0.5273, -2.3581, -3.3643,  0.0610,  0.4565, -0.3277, -0.6048, -0.4218,
        -0.3542, -0.2447,  3.0350,  1.1598, -1.0158, -1.7802, -2.1015, -3.8842]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
56 tensor(46.5483, grad_fn=<MseLossBackward0>) tensor([ 0.1418, -0.3696,  2.4748

59 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143, -1.0773,  0.6216,  0.5742,  0.6529,  0.8887,
        -0.5158,  0.4361, -0.1033,  0.5256, -1.1289,  1.1783,  1.1414,  1.2277,
         0.3584,  1.5712, -2.1064,  1.6151,  1.0772,  0.2488, -0.2068, -0.0297,
        -0.2447, -0.1095, -2.6379, -0.2745,  0.7910,  0.1867, -0.1392, -0.0426,
        -0.3023, -0.0393,  1.2926,  0.8497, -1.1144, -1.2262, -1.6318, -1.9447]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
59 tensor(72.6023, grad_fn=<MseLossBackward0>) tensor([-0.2359, -0.3389,  4.5363

62 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785, -0.4868,  0.1399, -0.8561,  0.1180,  0.5288,
        -0.0770,  1.1890, -0.8977,  0.6288, -1.6728,  1.1446,  1.2392,  1.1784,
         0.9960,  1.4934, -3.1282,  2.3670, -0.3376,  1.2944, -0.7661, -0.6377,
        -0.9672, -1.1636, -1.6647,  0.2956,  0.2395,  0.7125, -0.8863,  0.1634,
        -0.3413,  0.2162,  2.0005,  0.1332, -1.1790, -1.6021, -1.7005, -2.6349]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
62 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137

torch.Size([50, 1])
torch.Size([50, 1])
63 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
63 tensor(56.7572, grad_fn=<MseLossBackw

65 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785, -0.4868,  0.1399, -0.8561,  0.1180,  0.5288,
        -0.0770,  1.1890, -0.8977,  0.6288, -1.6728,  1.1446,  1.2392,  1.1784,
         0.9960,  1.4934, -3.1282,  2.3670, -0.3376,  1.2944, -0.7661, -0.6377,
        -0.9672, -1.1636, -1.6647,  0.2956,  0.2395,  0.7125, -0.8863,  0.1634,
        -0.3413,  0.2162,  2.0005,  0.1332, -1.1790, -1.6021, -1.7005, -2.6349]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
65 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137

66 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,  1.8682e+00,  3.8455e-02, -7.7630e-01,
        -1.5728e+00, -9.2286e-01,  5.1943e-01,  3.8892e-01, -3.9379e-02,
         4.0169e-01,  5.7737e-01, -1.9977e+00,  1.7382e+00,  1.3465e+00,
         1.5271e+00,  1.0047e+00,  1.7982e+00, -1.4762e+00,  7.1747e-01,
        -6.8187e-01,  9.0230e-01, -7.9495e-01, -6.4006e-01, -8.1657e-01,
        -1.8609e+00, -3.2403e+00, -1.0703e+00,  1.5178e+00,  6.4488e-01,
        -8.6450e-01, -1.9118e-02, -2.5757e-01, -1.0304e-01,  3.1014e+00,
         8.5843e-04, -1.4660e+00, -1.6253e+00, -1.3875e+00, -3.8590e+00]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4

torch.Size([50, 1])
torch.Size([50, 1])
67 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,  1.8682e+00,  3.8455e-02, -7.7630e-01,
        -1.5728e+00, -9.2286e-01,  5.1943e-01,  3.8892e-01, -3.9379e-02,
         4.0169e-01,  5.7737e-01, -1.9977e+00,  1.7382e+00,  1.3465e+00,
         1.5271e+00,  1.0047e+00,  1.7982e+00, -1.4762e+00,  7.1747e-01,
        -6.8187e-01,  9.0230e-01, -7.9495e-01, -6.4006e-01, -8.1657e-01,
        -1.8609e+00, -3.2403e+00, -1.0703e+00,  1.5178e+00,  6.4488e-01,
        -8.6450e-01, -1.9118e-02, -2.5757e-01, -1.0304e-01,  3.1014e+00,
         8.5843e-04, -1.4660e+00, -1.6253e+00, -1.3875e+00, -3.8590e+00]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        

68 tensor(50.2281, grad_fn=<MseLossBackward0>) tensor([-0.9412,  0.5857,  2.2202, -1.4067,  1.3619,  1.4238,  1.2743, -1.0340,
        -0.8248,  0.0293, -0.0040,  0.6214, -0.8355,  0.8264,  0.3098,  0.6399,
         0.7131,  2.2353, -2.7911,  2.0029,  1.4411,  0.0292,  0.3450,  0.7483,
         0.2093, -1.1692, -2.7358,  0.2488,  0.3241, -0.7779,  0.6162, -0.9108,
        -1.3809, -0.9530,  2.2351,  2.0084, -0.9422, -1.2562, -2.1217, -3.3123]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
68 tensor(47.5602, grad_fn=<MseLossBackward0>) tensor([-0.2632, -0.0716,  2.3448

69 tensor(46.5483, grad_fn=<MseLossBackward0>) tensor([ 0.1418, -0.3696,  2.4748, -1.2439,  0.6396,  0.1705,  0.5132,  0.8398,
         0.5724,  0.2844, -0.3849,  1.7109, -1.3265,  0.7965,  0.8466,  0.9827,
         0.3630,  0.3937, -1.6078,  1.4240,  0.0834,  0.0313,  0.3962,  0.4987,
         0.0488,  0.0867, -1.4522, -0.6396,  0.8197,  0.6293, -0.7613, -0.0943,
        -0.2735,  0.1847,  1.3872, -0.3643, -1.5396, -1.3106, -1.2146, -2.2533]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
69 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742

71 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785, -0.4868,  0.1399, -0.8561,  0.1180,  0.5288,
        -0.0770,  1.1890, -0.8977,  0.6288, -1.6728,  1.1446,  1.2392,  1.1784,
         0.9960,  1.4934, -3.1282,  2.3670, -0.3376,  1.2944, -0.7661, -0.6377,
        -0.9672, -1.1636, -1.6647,  0.2956,  0.2395,  0.7125, -0.8863,  0.1634,
        -0.3413,  0.2162,  2.0005,  0.1332, -1.1790, -1.6021, -1.7005, -2.6349]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137

        -0.4800, -0.1218,  2.4522,  0.1586, -1.5514, -0.9161, -2.2711, -3.5592]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
72 tensor(54.9587, grad_fn=<MseLossBackward0>) tensor([-0.1086, -0.0142,  2.2422, -0.5951,  0.7224,  0.5913,  0.6174, -0.6083,
         0.1164,  0.7156, -0.3369,  0.4894, -1.7996,  1.2586,  0.9207,  1.3520,
         1.7992,  2.4717, -2.5275,  1.8485,  0.4820,  1.3939, -0.3597, -0.1901,
        -0.5273, -2.3581, -3.3643,  0.0610,  0.4565, -0.3277, -0.6048, -0.4218,
        -0.3542, -0.2447,  3.0350,  1.1598, -1.0158, -1.7802, -2.1015, -3.8842])

        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
73 tensor(39.3987, grad_fn=<MseLossBackward0>) tensor([-1.7608,  1.6497, -0.3491, -0.0519,  0.3156, -1.9920,  0.3433, -0.0591,
         0.1415,  1.0966, -1.0129,  0.7207,  0.0373, -0.4052,  0.9244, -0.3582,
        -0.2096,  2.2112, -1.6812,  1.5529, -1.1049, -0.0573,  0.6212, -1.4171,
         0.5491,  0.3213, -3.3901, -1.7472,  2.0221, -0.8522, -0.3703,  0.6180,
        -0.1737,  0.7782, -0.5592,  1.3382, -0.9008, -1.4498, -1.2327, -1.0667]) t

75 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785, -0.4868,  0.1399, -0.8561,  0.1180,  0.5288,
        -0.0770,  1.1890, -0.8977,  0.6288, -1.6728,  1.1446,  1.2392,  1.1784,
         0.9960,  1.4934, -3.1282,  2.3670, -0.3376,  1.2944, -0.7661, -0.6377,
        -0.9672, -1.1636, -1.6647,  0.2956,  0.2395,  0.7125, -0.8863,  0.1634,
        -0.3413,  0.2162,  2.0005,  0.1332, -1.1790, -1.6021, -1.7005, -2.6349]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
75 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137

76 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
76 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907

        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
77 tensor(39.3987, grad_fn=<MseLossBackward0>) tensor([-1.7608,  1.6497, -0.3491, -0.0519,  0.3156, -1.9920,  0.3433, -0.0591,
         0.1415,  1.0966, -1.0129,  0.7207,  0.0373, -0.4052,  0.9244, -0.3582,
        -0.2096,  2.2112, -1.6812,  1.5529, -1.1049, -0.0573,  0.6212, -1.4171,
         0.5491,  0.3213, -3.3901, -1.7472,  2.0221, -0.8522, -0.3703,  0.6180,
        -0.1737,  0.7782, -0.5592,  1.3382, -0.9008, -1.4498, -1.2327, -1.0667]) t

78 tensor(46.5483, grad_fn=<MseLossBackward0>) tensor([ 0.1418, -0.3696,  2.4748, -1.2439,  0.6396,  0.1705,  0.5132,  0.8398,
         0.5724,  0.2844, -0.3849,  1.7109, -1.3265,  0.7965,  0.8466,  0.9827,
         0.3630,  0.3937, -1.6078,  1.4240,  0.0834,  0.0313,  0.3962,  0.4987,
         0.0488,  0.0867, -1.4522, -0.6396,  0.8197,  0.6293, -0.7613, -0.0943,
        -0.2735,  0.1847,  1.3872, -0.3643, -1.5396, -1.3106, -1.2146, -2.2533]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
78 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742

80 tensor(53.2632, grad_fn=<MseLossBackward0>) tensor([-1.1753,  0.5564,  3.6616, -0.6714,  0.6317,  0.9117,  0.5737, -0.2768,
        -1.5066,  0.9084, -0.3435, -0.0224, -1.0093,  0.6559,  0.5028,  0.8133,
         0.7903,  1.6084, -3.1146,  1.9305,  1.9763,  0.0634,  0.4610,  1.1265,
         0.2803, -1.0617, -3.1946,  0.2666,  0.3691, -1.1687, -0.3128, -0.5095,
        -0.7311, -0.4112,  2.5309,  0.9324, -1.2242, -0.9786, -2.4845, -3.5853]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
80 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785

81 tensor(53.2632, grad_fn=<MseLossBackward0>) tensor([-1.1753,  0.5564,  3.6616, -0.6714,  0.6317,  0.9117,  0.5737, -0.2768,
        -1.5066,  0.9084, -0.3435, -0.0224, -1.0093,  0.6559,  0.5028,  0.8133,
         0.7903,  1.6084, -3.1146,  1.9305,  1.9763,  0.0634,  0.4610,  1.1265,
         0.2803, -1.0617, -3.1946,  0.2666,  0.3691, -1.1687, -0.3128, -0.5095,
        -0.7311, -0.4112,  2.5309,  0.9324, -1.2242, -0.9786, -2.4845, -3.5853]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
81 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785

82 tensor(53.2632, grad_fn=<MseLossBackward0>) tensor([-1.1753,  0.5564,  3.6616, -0.6714,  0.6317,  0.9117,  0.5737, -0.2768,
        -1.5066,  0.9084, -0.3435, -0.0224, -1.0093,  0.6559,  0.5028,  0.8133,
         0.7903,  1.6084, -3.1146,  1.9305,  1.9763,  0.0634,  0.4610,  1.1265,
         0.2803, -1.0617, -3.1946,  0.2666,  0.3691, -1.1687, -0.3128, -0.5095,
        -0.7311, -0.4112,  2.5309,  0.9324, -1.2242, -0.9786, -2.4845, -3.5853]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
82 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785

82 tensor(46.5483, grad_fn=<MseLossBackward0>) tensor([ 0.1418, -0.3696,  2.4748, -1.2439,  0.6396,  0.1705,  0.5132,  0.8398,
         0.5724,  0.2844, -0.3849,  1.7109, -1.3265,  0.7965,  0.8466,  0.9827,
         0.3630,  0.3937, -1.6078,  1.4240,  0.0834,  0.0313,  0.3962,  0.4987,
         0.0488,  0.0867, -1.4522, -0.6396,  0.8197,  0.6293, -0.7613, -0.0943,
        -0.2735,  0.1847,  1.3872, -0.3643, -1.5396, -1.3106, -1.2146, -2.2533]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
82 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742

83 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
83 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907

84 tensor(72.6023, grad_fn=<MseLossBackward0>) tensor([-0.2359, -0.3389,  4.5363, -0.8755,  0.8194,  1.2710,  0.1703, -1.0552,
         0.8812,  1.3354, -0.9945,  0.2760, -2.2619,  1.3436,  1.1108,  1.7730,
         2.2951,  1.8520, -3.1515,  2.1809,  1.0676,  2.1781, -0.7517, -0.4319,
        -1.4950, -3.9528, -2.5246,  0.1888,  0.3292, -1.4618, -0.8295, -0.5336,
        -0.8814, -0.0679,  3.9277, -0.1575, -1.9600, -1.7966, -2.6497, -4.9053]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
84 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,

torch.Size([50, 1])
torch.Size([50, 1])
85 tensor(50.2281, grad_fn=<MseLossBackward0>) tensor([-0.9412,  0.5857,  2.2202, -1.4067,  1.3619,  1.4238,  1.2743, -1.0340,
        -0.8248,  0.0293, -0.0040,  0.6214, -0.8355,  0.8264,  0.3098,  0.6399,
         0.7131,  2.2353, -2.7911,  2.0029,  1.4411,  0.0292,  0.3450,  0.7483,
         0.2093, -1.1692, -2.7358,  0.2488,  0.3241, -0.7779,  0.6162, -0.9108,
        -1.3809, -0.9530,  2.2351,  2.0084, -0.9422, -1.2562, -2.1217, -3.3123]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
85 tensor(47.5602, grad_fn=<MseLossBackw

86 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,  1.8682e+00,  3.8455e-02, -7.7630e-01,
        -1.5728e+00, -9.2286e-01,  5.1943e-01,  3.8892e-01, -3.9379e-02,
         4.0169e-01,  5.7737e-01, -1.9977e+00,  1.7382e+00,  1.3465e+00,
         1.5271e+00,  1.0047e+00,  1.7982e+00, -1.4762e+00,  7.1747e-01,
        -6.8187e-01,  9.0230e-01, -7.9495e-01, -6.4006e-01, -8.1657e-01,
        -1.8609e+00, -3.2403e+00, -1.0703e+00,  1.5178e+00,  6.4488e-01,
        -8.6450e-01, -1.9118e-02, -2.5757e-01, -1.0304e-01,  3.1014e+00,
         8.5843e-04, -1.4660e+00, -1.6253e+00, -1.3875e+00, -3.8590e+00]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4

87 tensor(50.2281, grad_fn=<MseLossBackward0>) tensor([-0.9412,  0.5857,  2.2202, -1.4067,  1.3619,  1.4238,  1.2743, -1.0340,
        -0.8248,  0.0293, -0.0040,  0.6214, -0.8355,  0.8264,  0.3098,  0.6399,
         0.7131,  2.2353, -2.7911,  2.0029,  1.4411,  0.0292,  0.3450,  0.7483,
         0.2093, -1.1692, -2.7358,  0.2488,  0.3241, -0.7779,  0.6162, -0.9108,
        -1.3809, -0.9530,  2.2351,  2.0084, -0.9422, -1.2562, -2.1217, -3.3123]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
87 tensor(47.5602, grad_fn=<MseLossBackward0>) tensor([-0.2632, -0.0716,  2.3448

88 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,  1.8682e+00,  3.8455e-02, -7.7630e-01,
        -1.5728e+00, -9.2286e-01,  5.1943e-01,  3.8892e-01, -3.9379e-02,
         4.0169e-01,  5.7737e-01, -1.9977e+00,  1.7382e+00,  1.3465e+00,
         1.5271e+00,  1.0047e+00,  1.7982e+00, -1.4762e+00,  7.1747e-01,
        -6.8187e-01,  9.0230e-01, -7.9495e-01, -6.4006e-01, -8.1657e-01,
        -1.8609e+00, -3.2403e+00, -1.0703e+00,  1.5178e+00,  6.4488e-01,
        -8.6450e-01, -1.9118e-02, -2.5757e-01, -1.0304e-01,  3.1014e+00,
         8.5843e-04, -1.4660e+00, -1.6253e+00, -1.3875e+00, -3.8590e+00]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4

89 tensor(72.6023, grad_fn=<MseLossBackward0>) tensor([-0.2359, -0.3389,  4.5363, -0.8755,  0.8194,  1.2710,  0.1703, -1.0552,
         0.8812,  1.3354, -0.9945,  0.2760, -2.2619,  1.3436,  1.1108,  1.7730,
         2.2951,  1.8520, -3.1515,  2.1809,  1.0676,  2.1781, -0.7517, -0.4319,
        -1.4950, -3.9528, -2.5246,  0.1888,  0.3292, -1.4618, -0.8295, -0.5336,
        -0.8814, -0.0679,  3.9277, -0.1575, -1.9600, -1.7966, -2.6497, -4.9053]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
89 tensor(59.5352, grad_fn=<MseLossBackward0>) tensor([ 4.4581e-01, -6.2305e-01,

90 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143, -1.0773,  0.6216,  0.5742,  0.6529,  0.8887,
        -0.5158,  0.4361, -0.1033,  0.5256, -1.1289,  1.1783,  1.1414,  1.2277,
         0.3584,  1.5712, -2.1064,  1.6151,  1.0772,  0.2488, -0.2068, -0.0297,
        -0.2447, -0.1095, -2.6379, -0.2745,  0.7910,  0.1867, -0.1392, -0.0426,
        -0.3023, -0.0393,  1.2926,  0.8497, -1.1144, -1.2262, -1.6318, -1.9447]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
90 tensor(72.6023, grad_fn=<MseLossBackward0>) tensor([-0.2359, -0.3389,  4.5363

torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143, -1.0773,  0.6216,  0.5742,  0.6529,  0.8887,
        -0.5158,  0.4361, -0.1033,  0.5256, -1.1289,  1.1783,  1.1414,  1.2277,
         0.3584,  1.5712, -2.1064,  1.6151,  1.0772,  0.2488, -0.2068, -0.0297,
        -0.2447, -0.1095, -2.6379, -0.2745,  0.7910,  0.1867, -0.1392, -0.0426,
        -0.3023, -0.0393,  1.2926,  0.8497, -1.1144, -1.2262, -1.6318, -1.9447]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(72.6023, grad_fn=<MseLossBackw

92 tensor(52.9532, grad_fn=<MseLossBackward0>) tensor([-0.0036, -0.0876,  1.4785, -0.4868,  0.1399, -0.8561,  0.1180,  0.5288,
        -0.0770,  1.1890, -0.8977,  0.6288, -1.6728,  1.1446,  1.2392,  1.1784,
         0.9960,  1.4934, -3.1282,  2.3670, -0.3376,  1.2944, -0.7661, -0.6377,
        -0.9672, -1.1636, -1.6647,  0.2956,  0.2395,  0.7125, -0.8863,  0.1634,
        -0.3413,  0.2162,  2.0005,  0.1332, -1.1790, -1.6021, -1.7005, -2.6349]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
92 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137

93 tensor(48.5478, grad_fn=<MseLossBackward0>) tensor([-0.3470,  0.2523,  1.7713, -0.8372,  0.8539,  0.5348,  0.7210, -0.8715,
         0.0837,  0.7392, -0.5987,  0.5631, -0.9992,  0.8662,  0.7514,  0.9207,
         0.2095,  1.8827, -2.2354,  1.8028,  0.5331,  1.0187, -0.5971, -0.3320,
        -0.7662, -0.6368, -3.5024, -0.2368,  0.7241, -0.6128,  0.1144, -0.8239,
        -0.7873, -0.6590,  2.3783,  0.7408, -1.0362, -1.3526, -1.9344, -3.2982]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
93 tensor(53.2632, grad_fn=<MseLossBackward0>) tensor([-1.1753,  0.5564,  3.6616

93 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742, -0.8456,  0.7764,  0.6515,  0.7106,  0.3070,
         0.4539,  0.3019, -0.2493,  0.6548, -1.8096,  1.6734,  1.1914,  1.7863,
         0.0902,  0.9624, -2.7431,  2.2244, -0.4050,  0.4287, -0.4894,  0.1019,
        -0.6103, -0.5838, -1.4356, -0.3633,  0.6598,  0.5126,  0.4282, -0.9356,
        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
93 tensor(39.3987, grad_fn=<MseLossBackward0>) tensor([-1.7608,  1.6497, -0.3491, 

94 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
94 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907

torch.Size([50, 1])
95 tensor(50.2281, grad_fn=<MseLossBackward0>) tensor([-0.9412,  0.5857,  2.2202, -1.4067,  1.3619,  1.4238,  1.2743, -1.0340,
        -0.8248,  0.0293, -0.0040,  0.6214, -0.8355,  0.8264,  0.3098,  0.6399,
         0.7131,  2.2353, -2.7911,  2.0029,  1.4411,  0.0292,  0.3450,  0.7483,
         0.2093, -1.1692, -2.7358,  0.2488,  0.3241, -0.7779,  0.6162, -0.9108,
        -1.3809, -0.9530,  2.2351,  2.0084, -0.9422, -1.2562, -2.1217, -3.3123]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
95 tensor(47.5602, grad_fn=<MseLossBackward0>) tensor([-0.26

         0.5757,  0.7148,  1.8643,  0.9043, -1.5223, -1.5837, -2.1258, -2.1235]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
96 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143, -1.0773,  0.6216,  0.5742,  0.6529,  0.8887,
        -0.5158,  0.4361, -0.1033,  0.5256, -1.1289,  1.1783,  1.1414,  1.2277,
         0.3584,  1.5712, -2.1064,  1.6151,  1.0772,  0.2488, -0.2068, -0.0297,
        -0.2447, -0.1095, -2.6379, -0.2745,  0.7910,  0.1867, -0.1392, -0.0426,
        -0.3023, -0.0393,  1.2926,  0.8497, -1.1144, -1.2262, -1.6318, -1.9447])

97 tensor(48.5478, grad_fn=<MseLossBackward0>) tensor([-0.3470,  0.2523,  1.7713, -0.8372,  0.8539,  0.5348,  0.7210, -0.8715,
         0.0837,  0.7392, -0.5987,  0.5631, -0.9992,  0.8662,  0.7514,  0.9207,
         0.2095,  1.8827, -2.2354,  1.8028,  0.5331,  1.0187, -0.5971, -0.3320,
        -0.7662, -0.6368, -3.5024, -0.2368,  0.7241, -0.6128,  0.1144, -0.8239,
        -0.7873, -0.6590,  2.3783,  0.7408, -1.0362, -1.3526, -1.9344, -3.2982]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(53.2632, grad_fn=<MseLossBackward0>) tensor([-1.1753,  0.5564,  3.6616

97 tensor(54.2208, grad_fn=<MseLossBackward0>) tensor([-0.4398,  0.2416,  1.6615, -0.5253,  0.5632,  0.2384,  0.3826,  0.4706,
        -0.3337,  0.3259, -0.1812,  0.5993, -1.7076,  1.0305,  0.7566,  1.1189,
         1.7011,  1.0618, -2.3580,  2.0360,  0.9187, -0.1013,  0.7036,  1.1450,
         0.5807, -1.4447, -2.9616, -0.0066,  0.3231,  0.2716, -0.2311, -0.6477,
        -0.7541, -0.5339,  2.6250,  0.7899, -0.8014, -1.2834, -1.9725, -3.3706]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(56.7572, grad_fn=<MseLossBackward0>) tensor([-0.5785,  0.2656,  3.3907

98 tensor(46.8444, grad_fn=<MseLossBackward0>) tensor([-0.6847,  0.4322,  2.9137, -0.3601, -0.4910,  0.0381, -0.6553,  0.5024,
        -0.6456,  0.7998, -0.6238,  0.7683, -2.0511,  1.5168,  1.2284,  1.6623,
         1.2386,  1.1638, -2.8854,  2.3294,  0.9668,  1.4833, -1.1850, -0.4777,
        -1.4092, -0.9460, -2.8588,  0.4911, -0.2834, -0.0360, -1.3643,  0.4974,
         0.5757,  0.7148,  1.8643,  0.9043, -1.5223, -1.5837, -2.1258, -2.1235]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
98 tensor(44.3275, grad_fn=<MseLossBackward0>) tensor([-1.4582,  1.1119,  2.0143

98 tensor(45.6119, grad_fn=<MseLossBackward0>) tensor([-0.3424,  0.4211,  1.1742, -0.8456,  0.7764,  0.6515,  0.7106,  0.3070,
         0.4539,  0.3019, -0.2493,  0.6548, -1.8096,  1.6734,  1.1914,  1.7863,
         0.0902,  0.9624, -2.7431,  2.2244, -0.4050,  0.4287, -0.4894,  0.1019,
        -0.6103, -0.5838, -1.4356, -0.3633,  0.6598,  0.5126,  0.4282, -0.9356,
        -0.6974, -0.7994,  1.6460, -0.3275, -0.7987, -1.3402, -1.4499, -2.0401]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
98 tensor(39.3987, grad_fn=<MseLossBackward0>) tensor([-1.7608,  1.6497, -0.3491, 

        -1.3809, -0.9530,  2.2351,  2.0084, -0.9422, -1.2562, -2.1217, -3.3123]) tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
99 tensor(47.5602, grad_fn=<MseLossBackward0>) tensor([-0.2632, -0.0716,  2.3448, -0.7451,  0.0638, -0.2418, -0.0732,  0.7007,
        -0.2045,  1.0157, -0.7044,  0.2885, -0.9572,  0.8589,  0.4627,  0.8170,
         0.5912,  2.0071, -2.3329,  1.6991,  0.8287,  0.6592, -0.5660,  0.0061,
        -0.7934, -0.7673, -3.1640, -0.2502,  0.5918, -0.2201, -0.2832, -0.1801,
        -0.7221, -0.0857,  1.0538,  0.7574, -1.3895, -1.1931, -1.9930, -2.1967])

In [12]:
parameters_to_vector(model.parameters())

tensor([-0.2370, -0.3320, -0.2170,  0.2886, -0.0827,  0.1951,  0.1779,  0.0325,
        -0.0576, -0.0589, -0.0842,  0.0782, -0.1832, -0.0889,  0.0903, -0.0148,
        -0.0175,  0.1231,  0.1867,  0.0070, -0.0676, -0.0030, -0.3038, -0.0391,
         0.3197,  0.0278,  0.1288,  0.1186,  0.2475, -0.0284, -0.3264, -0.2566,
        -0.2124,  0.0374,  0.2054,  0.0765,  0.4812, -0.3248,  0.4979, -0.3124],
       grad_fn=<CatBackward0>)

In [13]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [14]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3
                                   )

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                #grads = grads_to_vector(model.parameters())
                optimizer.step()
                train_loss += loss.item()*data.size(0)
                
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [15]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0 + 1.0 * float(np.random.randn(1)))
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [16]:
print(projection_list[0])

[0, tensor([[-0.3819,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.3819,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000, -0.3819,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ..., -0.3819,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.3819,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.3819]]), 0, tensor([[-0.6528,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.6528,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000, -0.6528,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ..., -0.6528,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.6528,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.6528]]), 0, 0, tensor([[1.0142, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.0142, 0.0000,  ..., 0.0000, 0.0000

In [17]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(X_test, y_test), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [18]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [19]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([ 0.3126,  0.3167,  0.2975,  0.1243,  0.0321,  0.1757, -0.1198, -0.2944,
         0.1595, -0.2862, -0.1967, -0.1006,  0.1393,  0.3022,  0.1319, -0.2451,
        -0.1665, -0.2209,  0.1852, -0.1251,  0.2536,  0.1792,  0.0916, -0.2949,
        -0.2024, -0.0412,  0.3119,  0.3251, -0.3021, -0.3124, -0.2573,  0.2455,
         0.1160,  0.2282,  0.1826, -0.0095, -0.0324, -0.4335,  0.2317, -0.3184])


tensor([ 0.6251,  0.6333,  0.5951,  0.2485,  0.0643,  0.3514, -0.2397, -0.5888,
         0.3191, -0.5724, -0.3934, -0.2011,  0.2786,  0.6044,  0.2638, -0.4902,
        -0.3330, -0.4419,  0.3704, -0.2501,  0.5071,  0.3583,  0.1832, -0.5899,
        -0.4048, -0.0825,  0.6238,  0.6502, -0.6041, -0.6247, -0.5146,  0.4910,
         0.2321,  0.4564,  0.3653, -0.0190, -0.0649, -0.8671,  0.4634, -0.6369],
       grad_fn=<CatBackward0>)

In [20]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    
    #Share and mix the local weights
    
    
    for i in range(no_users):
        weights = parameters_to_vector(dummy_models[i].parameters())
        mat_vec_sum = torch.zeros_like(weights)
        for j in G.neighbors(i):
            mat_vec_sum = torch.add(mat_vec_sum, parameters_to_vector(dummy_models[j].parameters()))
        
        new_weights = weights - mu * eta * (degree_list[i] * weights - mat_vec_sum)
        # Update real models
        vector_to_parameters(parameters=models[i].parameters(), vec=new_weights)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        #user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #total_rel_error.append(user_rel_error / no_users)
    #g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/2000 [00:02<1:13:20,  2.20s/it]

Training_loss 46.92816


  0%|          | 2/2000 [00:03<57:53,  1.74s/it]  

Training_loss 46.89336


  0%|          | 3/2000 [00:04<49:08,  1.48s/it]

Training_loss 46.87415


  0%|          | 4/2000 [00:05<43:12,  1.30s/it]

Training_loss 46.83373


  0%|          | 5/2000 [00:06<40:37,  1.22s/it]

Training_loss 46.80175


  0%|          | 6/2000 [00:09<53:55,  1.62s/it]

Training_loss 46.77290


  0%|          | 7/2000 [00:11<1:00:53,  1.83s/it]

Training_loss 46.73139


  0%|          | 8/2000 [00:14<1:11:22,  2.15s/it]

Training_loss 46.67996


  0%|          | 9/2000 [00:17<1:20:12,  2.42s/it]

Training_loss 46.63766


  0%|          | 10/2000 [00:19<1:13:30,  2.22s/it]

Training_loss 46.60817


  1%|          | 11/2000 [00:22<1:24:25,  2.55s/it]

Training_loss 46.57437


  1%|          | 12/2000 [00:24<1:18:51,  2.38s/it]

Training_loss 46.52774


  1%|          | 13/2000 [00:26<1:17:12,  2.33s/it]

Training_loss 46.50107


  1%|          | 14/2000 [00:29<1:23:09,  2.51s/it]

Training_loss 46.45217


  1%|          | 15/2000 [00:33<1:32:41,  2.80s/it]

Training_loss 46.40368


  1%|          | 16/2000 [00:36<1:34:23,  2.85s/it]

Training_loss 46.37079


  1%|          | 17/2000 [00:39<1:38:37,  2.98s/it]

Training_loss 46.33595


  1%|          | 18/2000 [00:42<1:39:56,  3.03s/it]

Training_loss 46.29679


  1%|          | 19/2000 [00:43<1:23:42,  2.54s/it]

Training_loss 46.26514


  1%|          | 20/2000 [00:45<1:14:54,  2.27s/it]

Training_loss 46.22992


  1%|          | 21/2000 [00:48<1:22:44,  2.51s/it]

Training_loss 46.18834


  1%|          | 22/2000 [00:51<1:29:39,  2.72s/it]

Training_loss 46.14842


  1%|          | 23/2000 [00:54<1:31:54,  2.79s/it]

Training_loss 46.09832


  1%|          | 24/2000 [00:57<1:34:10,  2.86s/it]

Training_loss 46.04487


  1%|▏         | 25/2000 [01:00<1:36:11,  2.92s/it]

Training_loss 45.99700


  1%|▏         | 26/2000 [01:02<1:27:33,  2.66s/it]

Training_loss 45.96131


  1%|▏         | 27/2000 [01:05<1:22:43,  2.52s/it]

Training_loss 45.92511


  1%|▏         | 28/2000 [01:08<1:27:29,  2.66s/it]

Training_loss 45.88585


  1%|▏         | 29/2000 [01:10<1:28:09,  2.68s/it]

Training_loss 45.84275


  2%|▏         | 30/2000 [01:13<1:31:43,  2.79s/it]

Training_loss 45.80328


  2%|▏         | 31/2000 [01:17<1:37:15,  2.96s/it]

Training_loss 45.75047


  2%|▏         | 32/2000 [01:20<1:44:10,  3.18s/it]

Training_loss 45.69909


  2%|▏         | 33/2000 [01:23<1:43:31,  3.16s/it]

Training_loss 45.66257


  2%|▏         | 34/2000 [01:26<1:41:24,  3.09s/it]

Training_loss 45.60326


  2%|▏         | 35/2000 [01:30<1:41:04,  3.09s/it]

Training_loss 45.54568


  2%|▏         | 36/2000 [01:33<1:41:50,  3.11s/it]

Training_loss 45.51028


  2%|▏         | 37/2000 [01:36<1:44:04,  3.18s/it]

Training_loss 45.46370


  2%|▏         | 38/2000 [01:39<1:42:20,  3.13s/it]

Training_loss 45.43126


  2%|▏         | 39/2000 [01:41<1:30:35,  2.77s/it]

Training_loss 45.36796


  2%|▏         | 40/2000 [01:44<1:32:03,  2.82s/it]

Training_loss 45.31461


  2%|▏         | 41/2000 [01:47<1:34:13,  2.89s/it]

Training_loss 45.26249


  2%|▏         | 42/2000 [01:50<1:38:05,  3.01s/it]

Training_loss 45.19005


  2%|▏         | 43/2000 [01:53<1:31:12,  2.80s/it]

Training_loss 45.13103


  2%|▏         | 44/2000 [01:55<1:32:30,  2.84s/it]

Training_loss 45.08618


  2%|▏         | 45/2000 [01:58<1:33:04,  2.86s/it]

Training_loss 45.03379


  2%|▏         | 46/2000 [02:01<1:32:32,  2.84s/it]

Training_loss 44.97574


  2%|▏         | 47/2000 [02:04<1:33:35,  2.88s/it]

Training_loss 44.92144


  2%|▏         | 48/2000 [02:07<1:31:20,  2.81s/it]

Training_loss 44.87714


  2%|▏         | 49/2000 [02:10<1:38:53,  3.04s/it]

Training_loss 44.84426


  2%|▎         | 50/2000 [02:13<1:37:00,  2.98s/it]

Training_loss 44.79744


  3%|▎         | 51/2000 [02:16<1:35:26,  2.94s/it]

Training_loss 44.72920


  3%|▎         | 52/2000 [02:19<1:37:01,  2.99s/it]

Training_loss 44.67622


  3%|▎         | 53/2000 [02:22<1:36:19,  2.97s/it]

Training_loss 44.61644


  3%|▎         | 54/2000 [02:26<1:41:54,  3.14s/it]

Training_loss 44.56977


  3%|▎         | 55/2000 [02:29<1:41:28,  3.13s/it]

Training_loss 44.52448


  3%|▎         | 56/2000 [02:33<1:57:01,  3.61s/it]

Training_loss 44.47082


  3%|▎         | 57/2000 [02:40<2:25:30,  4.49s/it]

Training_loss 44.42787


  3%|▎         | 58/2000 [02:42<2:03:19,  3.81s/it]

Training_loss 44.36210


  3%|▎         | 59/2000 [02:45<1:53:55,  3.52s/it]

Training_loss 44.29846


  3%|▎         | 60/2000 [02:46<1:32:32,  2.86s/it]

Training_loss 44.24908


  3%|▎         | 61/2000 [02:48<1:16:59,  2.38s/it]

Training_loss 44.16369


  3%|▎         | 62/2000 [02:49<1:08:38,  2.13s/it]

Training_loss 44.09444


  3%|▎         | 63/2000 [02:51<1:03:15,  1.96s/it]

Training_loss 44.04715


  3%|▎         | 64/2000 [02:52<1:00:03,  1.86s/it]

Training_loss 43.97512


  3%|▎         | 65/2000 [02:54<57:54,  1.80s/it]  

Training_loss 43.92633


  3%|▎         | 66/2000 [02:56<1:00:35,  1.88s/it]

Training_loss 43.86950


  3%|▎         | 67/2000 [02:58<59:40,  1.85s/it]  

Training_loss 43.82336


  3%|▎         | 68/2000 [02:59<55:45,  1.73s/it]

Training_loss 43.75612


  3%|▎         | 69/2000 [03:01<52:32,  1.63s/it]

Training_loss 43.70041


  4%|▎         | 70/2000 [03:02<50:31,  1.57s/it]

Training_loss 43.61297


  4%|▎         | 71/2000 [03:04<48:39,  1.51s/it]

Training_loss 43.53807


  4%|▎         | 72/2000 [03:05<47:06,  1.47s/it]

Training_loss 43.42573


  4%|▎         | 73/2000 [03:07<51:34,  1.61s/it]

Training_loss 43.35796


  4%|▎         | 74/2000 [03:09<59:09,  1.84s/it]

Training_loss 43.30864


  4%|▍         | 75/2000 [03:11<1:02:25,  1.95s/it]

Training_loss 43.23296


  4%|▍         | 76/2000 [03:13<1:00:30,  1.89s/it]

Training_loss 43.14590


  4%|▍         | 77/2000 [03:15<55:50,  1.74s/it]  

Training_loss 43.05717


  4%|▍         | 78/2000 [03:16<49:43,  1.55s/it]

Training_loss 42.99763


  4%|▍         | 79/2000 [03:17<48:27,  1.51s/it]

Training_loss 42.93356


  4%|▍         | 80/2000 [03:19<48:06,  1.50s/it]

Training_loss 42.87514


  4%|▍         | 81/2000 [03:20<48:42,  1.52s/it]

Training_loss 42.80650


  4%|▍         | 82/2000 [03:22<48:20,  1.51s/it]

Training_loss 42.75186


  4%|▍         | 83/2000 [03:23<48:41,  1.52s/it]

Training_loss 42.66527


  4%|▍         | 84/2000 [03:25<48:13,  1.51s/it]

Training_loss 42.58736


  4%|▍         | 85/2000 [03:26<47:17,  1.48s/it]

Training_loss 42.51509


  4%|▍         | 86/2000 [03:28<53:51,  1.69s/it]

Training_loss 42.43891


  4%|▍         | 87/2000 [03:30<55:34,  1.74s/it]

Training_loss 42.33850


  4%|▍         | 88/2000 [03:32<56:35,  1.78s/it]

Training_loss 42.24058


  4%|▍         | 89/2000 [03:34<56:40,  1.78s/it]

Training_loss 42.17344


  4%|▍         | 90/2000 [03:35<56:10,  1.76s/it]

Training_loss 42.09025


  5%|▍         | 91/2000 [03:37<54:14,  1.70s/it]

Training_loss 42.03189


  5%|▍         | 92/2000 [03:38<50:55,  1.60s/it]

Training_loss 41.97114


  5%|▍         | 93/2000 [03:40<49:02,  1.54s/it]

Training_loss 41.89135


  5%|▍         | 94/2000 [03:41<47:39,  1.50s/it]

Training_loss 41.79802


  5%|▍         | 95/2000 [03:43<47:59,  1.51s/it]

Training_loss 41.74070


  5%|▍         | 96/2000 [03:44<47:42,  1.50s/it]

Training_loss 41.67885


  5%|▍         | 97/2000 [03:46<46:52,  1.48s/it]

Training_loss 41.60627


  5%|▍         | 98/2000 [03:47<45:25,  1.43s/it]

Training_loss 41.53164


  5%|▍         | 99/2000 [03:49<46:40,  1.47s/it]

Training_loss 41.46337


  5%|▌         | 100/2000 [03:51<52:49,  1.67s/it]

Training_loss 41.37931


  5%|▌         | 101/2000 [03:53<54:19,  1.72s/it]

Training_loss 41.31589


  5%|▌         | 102/2000 [03:54<51:31,  1.63s/it]

Training_loss 41.23223


  5%|▌         | 103/2000 [03:55<47:46,  1.51s/it]

Training_loss 41.13581


  5%|▌         | 104/2000 [03:56<44:08,  1.40s/it]

Training_loss 41.01538


  5%|▌         | 105/2000 [03:57<41:47,  1.32s/it]

Training_loss 40.91115


  5%|▌         | 106/2000 [03:59<41:55,  1.33s/it]

Training_loss 40.83147


  5%|▌         | 107/2000 [04:00<43:10,  1.37s/it]

Training_loss 40.72977


  5%|▌         | 108/2000 [04:02<42:01,  1.33s/it]

Training_loss 40.67334


  5%|▌         | 109/2000 [04:03<40:10,  1.27s/it]

Training_loss 40.56613


  6%|▌         | 110/2000 [04:04<39:05,  1.24s/it]

Training_loss 40.48902


  6%|▌         | 111/2000 [04:05<38:45,  1.23s/it]

Training_loss 40.39440


  6%|▌         | 112/2000 [04:06<37:52,  1.20s/it]

Training_loss 40.29377


  6%|▌         | 113/2000 [04:07<37:27,  1.19s/it]

Training_loss 40.21408


  6%|▌         | 114/2000 [04:09<39:27,  1.26s/it]

Training_loss 40.13786


  6%|▌         | 115/2000 [04:10<39:15,  1.25s/it]

Training_loss 40.03506


  6%|▌         | 116/2000 [04:11<38:09,  1.22s/it]

Training_loss 39.91082


  6%|▌         | 117/2000 [04:12<37:23,  1.19s/it]

Training_loss 39.85241


  6%|▌         | 118/2000 [04:13<36:51,  1.17s/it]

Training_loss 39.76033


  6%|▌         | 119/2000 [04:15<38:41,  1.23s/it]

Training_loss 39.64707


  6%|▌         | 120/2000 [04:16<39:15,  1.25s/it]

Training_loss 39.57318


  6%|▌         | 121/2000 [04:17<38:39,  1.23s/it]

Training_loss 39.50490


  6%|▌         | 122/2000 [04:18<37:30,  1.20s/it]

Training_loss 39.38105


  6%|▌         | 123/2000 [04:20<39:51,  1.27s/it]

Training_loss 39.29497


  6%|▌         | 124/2000 [04:21<39:01,  1.25s/it]

Training_loss 39.23314


  6%|▋         | 125/2000 [04:22<38:43,  1.24s/it]

Training_loss 39.15741


  6%|▋         | 126/2000 [04:23<37:59,  1.22s/it]

Training_loss 39.07112


  6%|▋         | 127/2000 [04:25<37:10,  1.19s/it]

Training_loss 38.95651


  6%|▋         | 128/2000 [04:26<37:03,  1.19s/it]

Training_loss 38.88561


  6%|▋         | 129/2000 [04:27<36:43,  1.18s/it]

Training_loss 38.78371


  6%|▋         | 130/2000 [04:28<37:02,  1.19s/it]

Training_loss 38.67806


  7%|▋         | 131/2000 [04:29<35:58,  1.15s/it]

Training_loss 38.56443


  7%|▋         | 132/2000 [04:30<37:16,  1.20s/it]

Training_loss 38.47947


  7%|▋         | 133/2000 [04:32<39:26,  1.27s/it]

Training_loss 38.41286


  7%|▋         | 134/2000 [04:33<38:35,  1.24s/it]

Training_loss 38.29756


  7%|▋         | 135/2000 [04:34<38:37,  1.24s/it]

Training_loss 38.20647


  7%|▋         | 136/2000 [04:35<38:03,  1.22s/it]

Training_loss 38.08758


  7%|▋         | 137/2000 [04:37<37:18,  1.20s/it]

Training_loss 37.98480


  7%|▋         | 138/2000 [04:38<38:32,  1.24s/it]

Training_loss 37.89605


  7%|▋         | 139/2000 [04:39<37:28,  1.21s/it]

Training_loss 37.71890


  7%|▋         | 140/2000 [04:40<36:38,  1.18s/it]

Training_loss 37.60162


  7%|▋         | 141/2000 [04:41<37:29,  1.21s/it]

Training_loss 37.51771


  7%|▋         | 142/2000 [04:43<37:16,  1.20s/it]

Training_loss 37.41301


  7%|▋         | 143/2000 [04:44<36:33,  1.18s/it]

Training_loss 37.29466


  7%|▋         | 144/2000 [04:45<35:49,  1.16s/it]

Training_loss 37.21392


  7%|▋         | 145/2000 [04:46<35:27,  1.15s/it]

Training_loss 37.10591


  7%|▋         | 146/2000 [04:47<38:14,  1.24s/it]

Training_loss 37.03723


  7%|▋         | 147/2000 [04:49<37:38,  1.22s/it]

Training_loss 36.92178


  7%|▋         | 148/2000 [04:50<37:02,  1.20s/it]

Training_loss 36.82218


  7%|▋         | 149/2000 [04:51<37:04,  1.20s/it]

Training_loss 36.73403


  8%|▊         | 150/2000 [04:52<36:31,  1.18s/it]

Training_loss 36.64244


  8%|▊         | 151/2000 [04:53<35:59,  1.17s/it]

Training_loss 36.51905


  8%|▊         | 152/2000 [04:54<35:31,  1.15s/it]

Training_loss 36.39886


  8%|▊         | 153/2000 [04:55<34:34,  1.12s/it]

Training_loss 36.31153


  8%|▊         | 154/2000 [04:57<34:17,  1.11s/it]

Training_loss 36.20105


  8%|▊         | 155/2000 [04:58<33:42,  1.10s/it]

Training_loss 36.07348


  8%|▊         | 156/2000 [04:59<32:19,  1.05s/it]

Training_loss 35.95353


  8%|▊         | 157/2000 [05:00<32:14,  1.05s/it]

Training_loss 35.84013


  8%|▊         | 158/2000 [05:01<32:24,  1.06s/it]

Training_loss 35.74725


  8%|▊         | 159/2000 [05:02<31:34,  1.03s/it]

Training_loss 35.63382


  8%|▊         | 160/2000 [05:03<32:25,  1.06s/it]

Training_loss 35.53056


  8%|▊         | 161/2000 [05:04<33:09,  1.08s/it]

Training_loss 35.37803


  8%|▊         | 162/2000 [05:05<33:02,  1.08s/it]

Training_loss 35.21755


  8%|▊         | 163/2000 [05:06<32:15,  1.05s/it]

Training_loss 35.12842


  8%|▊         | 164/2000 [05:07<31:55,  1.04s/it]

Training_loss 35.03898


  8%|▊         | 165/2000 [05:08<32:35,  1.07s/it]

Training_loss 34.92212


  8%|▊         | 166/2000 [05:09<33:04,  1.08s/it]

Training_loss 34.79427


  8%|▊         | 167/2000 [05:10<32:22,  1.06s/it]

Training_loss 34.66550


  8%|▊         | 168/2000 [05:11<32:44,  1.07s/it]

Training_loss 34.55934


  8%|▊         | 169/2000 [05:12<32:06,  1.05s/it]

Training_loss 34.41114


  8%|▊         | 170/2000 [05:13<33:01,  1.08s/it]

Training_loss 34.25207


  9%|▊         | 171/2000 [05:15<32:36,  1.07s/it]

Training_loss 34.18723


  9%|▊         | 172/2000 [05:15<31:19,  1.03s/it]

Training_loss 34.10356


  9%|▊         | 173/2000 [05:17<31:32,  1.04s/it]

Training_loss 33.97308


  9%|▊         | 174/2000 [05:18<31:38,  1.04s/it]

Training_loss 33.86823


  9%|▉         | 175/2000 [05:19<32:28,  1.07s/it]

Training_loss 33.74492


  9%|▉         | 176/2000 [05:20<33:13,  1.09s/it]

Training_loss 33.68259


  9%|▉         | 177/2000 [05:21<36:18,  1.19s/it]

Training_loss 33.56530


  9%|▉         | 178/2000 [05:24<49:43,  1.64s/it]

Training_loss 33.47893


  9%|▉         | 179/2000 [05:29<1:17:42,  2.56s/it]

Training_loss 33.39370


  9%|▉         | 180/2000 [05:30<1:10:40,  2.33s/it]

Training_loss 33.26658


  9%|▉         | 181/2000 [05:32<1:01:27,  2.03s/it]

Training_loss 33.12658


  9%|▉         | 182/2000 [05:33<52:39,  1.74s/it]  

Training_loss 32.98079


  9%|▉         | 183/2000 [05:34<47:02,  1.55s/it]

Training_loss 32.88531


  9%|▉         | 184/2000 [05:35<44:20,  1.46s/it]

Training_loss 32.76956


  9%|▉         | 185/2000 [05:36<40:36,  1.34s/it]

Training_loss 32.65620


  9%|▉         | 186/2000 [05:37<37:31,  1.24s/it]

Training_loss 32.55825


  9%|▉         | 187/2000 [05:38<36:07,  1.20s/it]

Training_loss 32.46453


  9%|▉         | 188/2000 [05:39<34:30,  1.14s/it]

Training_loss 32.32791


  9%|▉         | 189/2000 [05:41<34:32,  1.14s/it]

Training_loss 32.24103


 10%|▉         | 190/2000 [05:42<37:27,  1.24s/it]

Training_loss 32.13449


 10%|▉         | 191/2000 [05:43<38:26,  1.27s/it]

Training_loss 32.05210


 10%|▉         | 192/2000 [05:45<38:59,  1.29s/it]

Training_loss 31.93367


 10%|▉         | 193/2000 [05:46<40:18,  1.34s/it]

Training_loss 31.81224


 10%|▉         | 194/2000 [05:48<41:42,  1.39s/it]

Training_loss 31.73036


 10%|▉         | 195/2000 [05:49<44:38,  1.48s/it]

Training_loss 31.61058


 10%|▉         | 196/2000 [05:51<43:36,  1.45s/it]

Training_loss 31.47044


 10%|▉         | 197/2000 [05:52<41:58,  1.40s/it]

Training_loss 31.38063


 10%|▉         | 198/2000 [05:53<40:02,  1.33s/it]

Training_loss 31.26765


 10%|▉         | 199/2000 [05:54<37:58,  1.27s/it]

Training_loss 31.13742


 10%|█         | 200/2000 [05:55<36:39,  1.22s/it]

Training_loss 31.04733


 10%|█         | 201/2000 [05:57<37:24,  1.25s/it]

Training_loss 30.90610


 10%|█         | 202/2000 [05:58<38:27,  1.28s/it]

Training_loss 30.78307


 10%|█         | 203/2000 [05:59<38:56,  1.30s/it]

Training_loss 30.69981


 10%|█         | 204/2000 [06:01<38:58,  1.30s/it]

Training_loss 30.58394


 10%|█         | 205/2000 [06:02<37:01,  1.24s/it]

Training_loss 30.48442


 10%|█         | 206/2000 [06:03<34:54,  1.17s/it]

Training_loss 30.39781


 10%|█         | 207/2000 [06:04<37:18,  1.25s/it]

Training_loss 30.28207


 10%|█         | 208/2000 [06:05<36:52,  1.23s/it]

Training_loss 30.16047


 10%|█         | 209/2000 [06:07<40:11,  1.35s/it]

Training_loss 30.06298


 10%|█         | 210/2000 [06:09<41:12,  1.38s/it]

Training_loss 29.96913


 11%|█         | 211/2000 [06:10<39:30,  1.33s/it]

Training_loss 29.86934


 11%|█         | 212/2000 [06:11<37:40,  1.26s/it]

Training_loss 29.72615


 11%|█         | 213/2000 [06:12<38:43,  1.30s/it]

Training_loss 29.62350


 11%|█         | 214/2000 [06:13<37:08,  1.25s/it]

Training_loss 29.51458


 11%|█         | 215/2000 [06:14<36:00,  1.21s/it]

Training_loss 29.42566


 11%|█         | 216/2000 [06:16<35:26,  1.19s/it]

Training_loss 29.32993


 11%|█         | 217/2000 [06:17<37:21,  1.26s/it]

Training_loss 29.22067


 11%|█         | 218/2000 [06:18<38:29,  1.30s/it]

Training_loss 29.10933


 11%|█         | 219/2000 [06:20<41:06,  1.38s/it]

Training_loss 29.00457


 11%|█         | 220/2000 [06:22<44:31,  1.50s/it]

Training_loss 28.88513


 11%|█         | 221/2000 [06:23<44:58,  1.52s/it]

Training_loss 28.78055


 11%|█         | 222/2000 [06:25<44:53,  1.52s/it]

Training_loss 28.66819


 11%|█         | 223/2000 [06:27<47:19,  1.60s/it]

Training_loss 28.59326


 11%|█         | 224/2000 [06:29<50:09,  1.69s/it]

Training_loss 28.51190


 11%|█▏        | 225/2000 [06:30<51:13,  1.73s/it]

Training_loss 28.39243


 11%|█▏        | 226/2000 [06:32<52:02,  1.76s/it]

Training_loss 28.30616


 11%|█▏        | 227/2000 [06:33<46:39,  1.58s/it]

Training_loss 28.19762


 11%|█▏        | 228/2000 [06:34<41:53,  1.42s/it]

Training_loss 28.14147


 11%|█▏        | 229/2000 [06:35<38:23,  1.30s/it]

Training_loss 28.01889


 12%|█▏        | 230/2000 [06:36<35:49,  1.21s/it]

Training_loss 27.90476


 12%|█▏        | 231/2000 [06:37<33:29,  1.14s/it]

Training_loss 27.76370


 12%|█▏        | 232/2000 [06:38<32:58,  1.12s/it]

Training_loss 27.68696


 12%|█▏        | 233/2000 [06:40<33:35,  1.14s/it]

Training_loss 27.58212


 12%|█▏        | 234/2000 [06:41<32:55,  1.12s/it]

Training_loss 27.47218


 12%|█▏        | 235/2000 [06:42<33:00,  1.12s/it]

Training_loss 27.38217


 12%|█▏        | 236/2000 [06:43<32:54,  1.12s/it]

Training_loss 27.28366


 12%|█▏        | 237/2000 [06:44<32:15,  1.10s/it]

Training_loss 27.19025


 12%|█▏        | 238/2000 [06:45<31:09,  1.06s/it]

Training_loss 27.06280


 12%|█▏        | 239/2000 [06:46<31:01,  1.06s/it]

Training_loss 26.95139


 12%|█▏        | 240/2000 [06:47<29:46,  1.01s/it]

Training_loss 26.80651


 12%|█▏        | 241/2000 [06:48<29:50,  1.02s/it]

Training_loss 26.70320


 12%|█▏        | 242/2000 [06:49<29:57,  1.02s/it]

Training_loss 26.57603


 12%|█▏        | 243/2000 [06:50<29:51,  1.02s/it]

Training_loss 26.48877


 12%|█▏        | 244/2000 [06:51<30:34,  1.04s/it]

Training_loss 26.37185


 12%|█▏        | 245/2000 [06:52<30:20,  1.04s/it]

Training_loss 26.28704


 12%|█▏        | 246/2000 [06:53<30:17,  1.04s/it]

Training_loss 26.16697


 12%|█▏        | 247/2000 [06:54<31:19,  1.07s/it]

Training_loss 26.06582


 12%|█▏        | 248/2000 [06:56<32:45,  1.12s/it]

Training_loss 25.96803


 12%|█▏        | 249/2000 [06:57<32:21,  1.11s/it]

Training_loss 25.85766


 12%|█▎        | 250/2000 [06:58<33:44,  1.16s/it]

Training_loss 25.75245


 13%|█▎        | 251/2000 [06:59<32:35,  1.12s/it]

Training_loss 25.65412


 13%|█▎        | 252/2000 [07:00<35:19,  1.21s/it]

Training_loss 25.55013


 13%|█▎        | 253/2000 [07:02<36:43,  1.26s/it]

Training_loss 25.46205


 13%|█▎        | 254/2000 [07:03<40:00,  1.37s/it]

Training_loss 25.36782


 13%|█▎        | 255/2000 [07:05<45:02,  1.55s/it]

Training_loss 25.27058


 13%|█▎        | 256/2000 [07:08<58:45,  2.02s/it]

Training_loss 25.16100


 13%|█▎        | 257/2000 [07:10<57:41,  1.99s/it]

Training_loss 25.08211


 13%|█▎        | 258/2000 [07:12<53:16,  1.84s/it]

Training_loss 24.98546


 13%|█▎        | 259/2000 [07:14<54:27,  1.88s/it]

Training_loss 24.87359


 13%|█▎        | 260/2000 [07:16<52:37,  1.81s/it]

Training_loss 24.79592


 13%|█▎        | 261/2000 [07:17<47:58,  1.66s/it]

Training_loss 24.68008


 13%|█▎        | 262/2000 [07:18<43:31,  1.50s/it]

Training_loss 24.59697


 13%|█▎        | 263/2000 [07:19<39:59,  1.38s/it]

Training_loss 24.49047


 13%|█▎        | 264/2000 [07:20<37:34,  1.30s/it]

Training_loss 24.40134


 13%|█▎        | 265/2000 [07:21<36:21,  1.26s/it]

Training_loss 24.32914


 13%|█▎        | 266/2000 [07:22<35:15,  1.22s/it]

Training_loss 24.26379


 13%|█▎        | 267/2000 [07:24<34:08,  1.18s/it]

Training_loss 24.17016


 13%|█▎        | 268/2000 [07:25<33:33,  1.16s/it]

Training_loss 24.12166


 13%|█▎        | 269/2000 [07:26<33:15,  1.15s/it]

Training_loss 24.00845


 14%|█▎        | 270/2000 [07:27<33:19,  1.16s/it]

Training_loss 23.92013


 14%|█▎        | 271/2000 [07:28<34:03,  1.18s/it]

Training_loss 23.84460


 14%|█▎        | 272/2000 [07:29<32:50,  1.14s/it]

Training_loss 23.75592


 14%|█▎        | 273/2000 [07:30<32:30,  1.13s/it]

Training_loss 23.67590


 14%|█▎        | 274/2000 [07:32<33:38,  1.17s/it]

Training_loss 23.57257


 14%|█▍        | 275/2000 [07:33<32:57,  1.15s/it]

Training_loss 23.47904


 14%|█▍        | 276/2000 [07:34<33:06,  1.15s/it]

Training_loss 23.36854


 14%|█▍        | 277/2000 [07:35<33:26,  1.16s/it]

Training_loss 23.29574


 14%|█▍        | 278/2000 [07:36<34:04,  1.19s/it]

Training_loss 23.17784


 14%|█▍        | 279/2000 [07:37<33:08,  1.16s/it]

Training_loss 23.10394


 14%|█▍        | 280/2000 [07:39<32:56,  1.15s/it]

Training_loss 23.01543


 14%|█▍        | 281/2000 [07:40<34:23,  1.20s/it]

Training_loss 22.94577


 14%|█▍        | 282/2000 [07:41<34:25,  1.20s/it]

Training_loss 22.82596


 14%|█▍        | 283/2000 [07:42<33:47,  1.18s/it]

Training_loss 22.73657


 14%|█▍        | 284/2000 [07:44<35:59,  1.26s/it]

Training_loss 22.66968


 14%|█▍        | 285/2000 [07:45<34:41,  1.21s/it]

Training_loss 22.56965


 14%|█▍        | 286/2000 [07:46<34:44,  1.22s/it]

Training_loss 22.48934


 14%|█▍        | 287/2000 [07:47<34:38,  1.21s/it]

Training_loss 22.39516


 14%|█▍        | 288/2000 [07:48<34:16,  1.20s/it]

Training_loss 22.35100


 14%|█▍        | 289/2000 [07:49<32:55,  1.15s/it]

Training_loss 22.27344


 14%|█▍        | 290/2000 [07:50<32:37,  1.15s/it]

Training_loss 22.20785


 15%|█▍        | 291/2000 [07:52<32:50,  1.15s/it]

Training_loss 22.14946


 15%|█▍        | 292/2000 [07:53<32:50,  1.15s/it]

Training_loss 22.08955


 15%|█▍        | 293/2000 [07:54<32:43,  1.15s/it]

Training_loss 21.99746


 15%|█▍        | 294/2000 [07:55<32:52,  1.16s/it]

Training_loss 21.91302


 15%|█▍        | 295/2000 [07:57<35:01,  1.23s/it]

Training_loss 21.84754


 15%|█▍        | 296/2000 [07:58<34:29,  1.21s/it]

Training_loss 21.76799


 15%|█▍        | 297/2000 [07:59<35:45,  1.26s/it]

Training_loss 21.69200


 15%|█▍        | 298/2000 [08:00<36:24,  1.28s/it]

Training_loss 21.63684


 15%|█▍        | 299/2000 [08:02<35:23,  1.25s/it]

Training_loss 21.58412


 15%|█▌        | 300/2000 [08:03<34:08,  1.20s/it]

Training_loss 21.48763


 15%|█▌        | 301/2000 [08:04<33:04,  1.17s/it]

Training_loss 21.44522


 15%|█▌        | 302/2000 [08:05<35:43,  1.26s/it]

Training_loss 21.37926


 15%|█▌        | 303/2000 [08:07<36:01,  1.27s/it]

Training_loss 21.25983


 15%|█▌        | 304/2000 [08:08<37:11,  1.32s/it]

Training_loss 21.17034


 15%|█▌        | 305/2000 [08:09<37:23,  1.32s/it]

Training_loss 21.10945


 15%|█▌        | 306/2000 [08:10<35:16,  1.25s/it]

Training_loss 20.99633


 15%|█▌        | 307/2000 [08:11<34:01,  1.21s/it]

Training_loss 20.90512


 15%|█▌        | 308/2000 [08:13<33:17,  1.18s/it]

Training_loss 20.84046


 15%|█▌        | 309/2000 [08:14<32:06,  1.14s/it]

Training_loss 20.76997


 16%|█▌        | 310/2000 [08:15<34:28,  1.22s/it]

Training_loss 20.69892


 16%|█▌        | 311/2000 [08:16<34:32,  1.23s/it]

Training_loss 20.65166


 16%|█▌        | 312/2000 [08:17<33:33,  1.19s/it]

Training_loss 20.56179


 16%|█▌        | 313/2000 [08:19<33:32,  1.19s/it]

Training_loss 20.50822


 16%|█▌        | 314/2000 [08:20<32:58,  1.17s/it]

Training_loss 20.46419


 16%|█▌        | 315/2000 [08:21<32:49,  1.17s/it]

Training_loss 20.37824


 16%|█▌        | 316/2000 [08:22<34:11,  1.22s/it]

Training_loss 20.34548


 16%|█▌        | 317/2000 [08:23<33:56,  1.21s/it]

Training_loss 20.31881


 16%|█▌        | 318/2000 [08:25<33:18,  1.19s/it]

Training_loss 20.28352


 16%|█▌        | 319/2000 [08:26<33:13,  1.19s/it]

Training_loss 20.24350


 16%|█▌        | 320/2000 [08:27<32:53,  1.17s/it]

Training_loss 20.18178


 16%|█▌        | 321/2000 [08:28<32:30,  1.16s/it]

Training_loss 20.09094


 16%|█▌        | 322/2000 [08:29<33:19,  1.19s/it]

Training_loss 20.02773


 16%|█▌        | 323/2000 [08:31<35:23,  1.27s/it]

Training_loss 19.97599


 16%|█▌        | 324/2000 [08:32<38:06,  1.36s/it]

Training_loss 19.89861


 16%|█▋        | 325/2000 [08:34<36:53,  1.32s/it]

Training_loss 19.86098


 16%|█▋        | 326/2000 [08:35<37:28,  1.34s/it]

Training_loss 19.80819


 16%|█▋        | 327/2000 [08:36<36:38,  1.31s/it]

Training_loss 19.76136


 16%|█▋        | 328/2000 [08:37<35:31,  1.27s/it]

Training_loss 19.69467


 16%|█▋        | 329/2000 [08:39<35:34,  1.28s/it]

Training_loss 19.64838


 16%|█▋        | 330/2000 [08:40<34:38,  1.24s/it]

Training_loss 19.61936


 17%|█▋        | 331/2000 [08:41<33:29,  1.20s/it]

Training_loss 19.57347


 17%|█▋        | 332/2000 [08:42<32:54,  1.18s/it]

Training_loss 19.52014


 17%|█▋        | 333/2000 [08:43<32:21,  1.16s/it]

Training_loss 19.47165


 17%|█▋        | 334/2000 [08:44<32:45,  1.18s/it]

Training_loss 19.42469


 17%|█▋        | 335/2000 [08:45<31:46,  1.14s/it]

Training_loss 19.34201


 17%|█▋        | 336/2000 [08:47<31:41,  1.14s/it]

Training_loss 19.29318


 17%|█▋        | 337/2000 [08:48<32:37,  1.18s/it]

Training_loss 19.24239


 17%|█▋        | 338/2000 [08:49<33:13,  1.20s/it]

Training_loss 19.17247


 17%|█▋        | 339/2000 [08:50<33:28,  1.21s/it]

Training_loss 19.15178


 17%|█▋        | 340/2000 [08:52<33:15,  1.20s/it]

Training_loss 19.13852


 17%|█▋        | 341/2000 [08:53<33:20,  1.21s/it]

Training_loss 19.09302


 17%|█▋        | 342/2000 [08:54<32:22,  1.17s/it]

Training_loss 19.05115


 17%|█▋        | 343/2000 [08:55<32:21,  1.17s/it]

Training_loss 19.00989


 17%|█▋        | 344/2000 [08:56<32:19,  1.17s/it]

Training_loss 18.94668


 17%|█▋        | 345/2000 [08:57<31:49,  1.15s/it]

Training_loss 18.89536


 17%|█▋        | 346/2000 [08:59<33:28,  1.21s/it]

Training_loss 18.87434


 17%|█▋        | 347/2000 [09:00<32:56,  1.20s/it]

Training_loss 18.81249


 17%|█▋        | 348/2000 [09:01<32:18,  1.17s/it]

Training_loss 18.76726


 17%|█▋        | 349/2000 [09:02<31:54,  1.16s/it]

Training_loss 18.71115


 18%|█▊        | 350/2000 [09:03<33:25,  1.22s/it]

Training_loss 18.67412


 18%|█▊        | 351/2000 [09:05<32:44,  1.19s/it]

Training_loss 18.64526


 18%|█▊        | 352/2000 [09:06<31:15,  1.14s/it]

Training_loss 18.60409


 18%|█▊        | 353/2000 [09:07<30:51,  1.12s/it]

Training_loss 18.55390


 18%|█▊        | 354/2000 [09:08<30:52,  1.13s/it]

Training_loss 18.51501


 18%|█▊        | 355/2000 [09:09<33:41,  1.23s/it]

Training_loss 18.48053


 18%|█▊        | 356/2000 [09:10<33:56,  1.24s/it]

Training_loss 18.44580


 18%|█▊        | 357/2000 [09:12<35:13,  1.29s/it]

Training_loss 18.40332


 18%|█▊        | 358/2000 [09:13<36:06,  1.32s/it]

Training_loss 18.37306


 18%|█▊        | 359/2000 [09:15<39:29,  1.44s/it]

Training_loss 18.34052


 18%|█▊        | 360/2000 [09:16<36:23,  1.33s/it]

Training_loss 18.28405


 18%|█▊        | 361/2000 [09:17<34:18,  1.26s/it]

Training_loss 18.24475


 18%|█▊        | 362/2000 [09:18<33:15,  1.22s/it]

Training_loss 18.20515


 18%|█▊        | 363/2000 [09:20<34:32,  1.27s/it]

Training_loss 18.19320


 18%|█▊        | 364/2000 [09:21<34:55,  1.28s/it]

Training_loss 18.16769


 18%|█▊        | 365/2000 [09:22<33:25,  1.23s/it]

Training_loss 18.10749


 18%|█▊        | 366/2000 [09:23<31:45,  1.17s/it]

Training_loss 18.07980


 18%|█▊        | 367/2000 [09:24<30:16,  1.11s/it]

Training_loss 18.03671


 18%|█▊        | 368/2000 [09:25<29:39,  1.09s/it]

Training_loss 18.00678


 18%|█▊        | 369/2000 [09:26<29:04,  1.07s/it]

Training_loss 17.98289


 18%|█▊        | 370/2000 [09:27<28:33,  1.05s/it]

Training_loss 17.95033


 19%|█▊        | 371/2000 [09:28<28:19,  1.04s/it]

Training_loss 17.91224


 19%|█▊        | 372/2000 [09:29<27:27,  1.01s/it]

Training_loss 17.85474


 19%|█▊        | 373/2000 [09:30<27:23,  1.01s/it]

Training_loss 17.84794


 19%|█▊        | 374/2000 [09:31<27:23,  1.01s/it]

Training_loss 17.83085


 19%|█▉        | 375/2000 [09:32<27:31,  1.02s/it]

Training_loss 17.79359


 19%|█▉        | 376/2000 [09:33<28:19,  1.05s/it]

Training_loss 17.77118


 19%|█▉        | 377/2000 [09:35<29:41,  1.10s/it]

Training_loss 17.73462


 19%|█▉        | 378/2000 [09:36<33:20,  1.23s/it]

Training_loss 17.71862


 19%|█▉        | 379/2000 [09:38<36:20,  1.34s/it]

Training_loss 17.68287


 19%|█▉        | 380/2000 [09:39<36:03,  1.34s/it]

Training_loss 17.66677


 19%|█▉        | 381/2000 [09:40<36:49,  1.36s/it]

Training_loss 17.64856


 19%|█▉        | 382/2000 [09:42<37:28,  1.39s/it]

Training_loss 17.61024


 19%|█▉        | 383/2000 [09:44<41:11,  1.53s/it]

Training_loss 17.58127


 19%|█▉        | 384/2000 [09:45<39:21,  1.46s/it]

Training_loss 17.56091


 19%|█▉        | 385/2000 [09:46<35:24,  1.32s/it]

Training_loss 17.51823


 19%|█▉        | 386/2000 [09:47<34:48,  1.29s/it]

Training_loss 17.49095


 19%|█▉        | 387/2000 [09:49<35:28,  1.32s/it]

Training_loss 17.45676


 19%|█▉        | 388/2000 [09:50<33:05,  1.23s/it]

Training_loss 17.42511


 19%|█▉        | 389/2000 [09:51<33:17,  1.24s/it]

Training_loss 17.38325


 20%|█▉        | 390/2000 [09:52<32:08,  1.20s/it]

Training_loss 17.34386


 20%|█▉        | 391/2000 [09:53<32:53,  1.23s/it]

Training_loss 17.31733


 20%|█▉        | 392/2000 [09:54<30:55,  1.15s/it]

Training_loss 17.30546


 20%|█▉        | 393/2000 [09:55<29:49,  1.11s/it]

Training_loss 17.28666


 20%|█▉        | 394/2000 [09:56<28:33,  1.07s/it]

Training_loss 17.27110


 20%|█▉        | 395/2000 [09:57<28:55,  1.08s/it]

Training_loss 17.25426


 20%|█▉        | 396/2000 [09:58<28:07,  1.05s/it]

Training_loss 17.24446


 20%|█▉        | 397/2000 [09:59<27:50,  1.04s/it]

Training_loss 17.23664


 20%|█▉        | 398/2000 [10:00<26:43,  1.00s/it]

Training_loss 17.21441


 20%|█▉        | 399/2000 [10:01<25:46,  1.04it/s]

Training_loss 17.18929


 20%|██        | 400/2000 [10:02<26:20,  1.01it/s]

Training_loss 17.15354


 20%|██        | 401/2000 [10:03<26:55,  1.01s/it]

Training_loss 17.12930


 20%|██        | 402/2000 [10:04<27:29,  1.03s/it]

Training_loss 17.11394


 20%|██        | 403/2000 [10:05<26:39,  1.00s/it]

Training_loss 17.10494


 20%|██        | 404/2000 [10:07<29:00,  1.09s/it]

Training_loss 17.07971


 20%|██        | 405/2000 [10:08<34:43,  1.31s/it]

Training_loss 17.05335


 20%|██        | 406/2000 [10:10<36:28,  1.37s/it]

Training_loss 17.03143


 20%|██        | 407/2000 [10:11<34:19,  1.29s/it]

Training_loss 17.01463


 20%|██        | 408/2000 [10:12<32:18,  1.22s/it]

Training_loss 16.97806


 20%|██        | 409/2000 [10:13<31:09,  1.17s/it]

Training_loss 16.97041


 20%|██        | 410/2000 [10:15<33:47,  1.28s/it]

Training_loss 16.94986


 21%|██        | 411/2000 [10:16<33:29,  1.26s/it]

Training_loss 16.93085


 21%|██        | 412/2000 [10:17<33:50,  1.28s/it]

Training_loss 16.92298


 21%|██        | 413/2000 [10:18<32:30,  1.23s/it]

Training_loss 16.90594


 21%|██        | 414/2000 [10:19<30:45,  1.16s/it]

Training_loss 16.89029


 21%|██        | 415/2000 [10:20<29:19,  1.11s/it]

Training_loss 16.86971


 21%|██        | 416/2000 [10:21<28:37,  1.08s/it]

Training_loss 16.84136


 21%|██        | 417/2000 [10:23<29:28,  1.12s/it]

Training_loss 16.82426


 21%|██        | 418/2000 [10:24<29:59,  1.14s/it]

Training_loss 16.79225


 21%|██        | 419/2000 [10:25<28:38,  1.09s/it]

Training_loss 16.77602


 21%|██        | 420/2000 [10:26<28:13,  1.07s/it]

Training_loss 16.75806


 21%|██        | 421/2000 [10:27<27:35,  1.05s/it]

Training_loss 16.76173


 21%|██        | 422/2000 [10:28<27:50,  1.06s/it]

Training_loss 16.74416


 21%|██        | 423/2000 [10:29<27:18,  1.04s/it]

Training_loss 16.74979


 21%|██        | 424/2000 [10:30<26:55,  1.02s/it]

Training_loss 16.75594


 21%|██▏       | 425/2000 [10:31<26:35,  1.01s/it]

Training_loss 16.73637


 21%|██▏       | 426/2000 [10:32<26:11,  1.00it/s]

Training_loss 16.69745


 21%|██▏       | 427/2000 [10:33<25:51,  1.01it/s]

Training_loss 16.68105


 21%|██▏       | 428/2000 [10:34<26:29,  1.01s/it]

Training_loss 16.66344


 21%|██▏       | 429/2000 [10:35<25:40,  1.02it/s]

Training_loss 16.65811


 22%|██▏       | 430/2000 [10:36<28:15,  1.08s/it]

Training_loss 16.64978


 22%|██▏       | 431/2000 [10:37<27:01,  1.03s/it]

Training_loss 16.63895


 22%|██▏       | 432/2000 [10:38<28:11,  1.08s/it]

Training_loss 16.64467


 22%|██▏       | 433/2000 [10:39<27:49,  1.07s/it]

Training_loss 16.62153


 22%|██▏       | 434/2000 [10:40<27:58,  1.07s/it]

Training_loss 16.59673


 22%|██▏       | 435/2000 [10:41<26:54,  1.03s/it]

Training_loss 16.58064


 22%|██▏       | 436/2000 [10:42<27:03,  1.04s/it]

Training_loss 16.56593


 22%|██▏       | 437/2000 [10:43<26:30,  1.02s/it]

Training_loss 16.54068


 22%|██▏       | 438/2000 [10:44<25:57,  1.00it/s]

Training_loss 16.53914


 22%|██▏       | 439/2000 [10:45<26:38,  1.02s/it]

Training_loss 16.52820


 22%|██▏       | 440/2000 [10:46<27:31,  1.06s/it]

Training_loss 16.49937


 22%|██▏       | 441/2000 [10:47<27:00,  1.04s/it]

Training_loss 16.47657


 22%|██▏       | 442/2000 [10:48<26:45,  1.03s/it]

Training_loss 16.46770


 22%|██▏       | 443/2000 [10:49<26:55,  1.04s/it]

Training_loss 16.44127


 22%|██▏       | 444/2000 [10:50<26:33,  1.02s/it]

Training_loss 16.41738


 22%|██▏       | 445/2000 [10:51<26:37,  1.03s/it]

Training_loss 16.39292


 22%|██▏       | 446/2000 [10:52<26:17,  1.02s/it]

Training_loss 16.37734


 22%|██▏       | 447/2000 [10:53<26:36,  1.03s/it]

Training_loss 16.35034


 22%|██▏       | 448/2000 [10:55<27:50,  1.08s/it]

Training_loss 16.33908


 22%|██▏       | 449/2000 [10:56<29:16,  1.13s/it]

Training_loss 16.32438


 22%|██▎       | 450/2000 [10:57<28:19,  1.10s/it]

Training_loss 16.31464


 23%|██▎       | 451/2000 [10:58<27:51,  1.08s/it]

Training_loss 16.28272


 23%|██▎       | 452/2000 [10:59<27:19,  1.06s/it]

Training_loss 16.27116


 23%|██▎       | 453/2000 [11:00<27:32,  1.07s/it]

Training_loss 16.25641


 23%|██▎       | 454/2000 [11:01<26:43,  1.04s/it]

Training_loss 16.26872


 23%|██▎       | 455/2000 [11:02<26:29,  1.03s/it]

Training_loss 16.25610


 23%|██▎       | 456/2000 [11:03<26:15,  1.02s/it]

Training_loss 16.24225


 23%|██▎       | 457/2000 [11:04<26:29,  1.03s/it]

Training_loss 16.21896


 23%|██▎       | 458/2000 [11:05<26:24,  1.03s/it]

Training_loss 16.21487


 23%|██▎       | 459/2000 [11:06<26:22,  1.03s/it]

Training_loss 16.21289


 23%|██▎       | 460/2000 [11:07<26:30,  1.03s/it]

Training_loss 16.21650


 23%|██▎       | 461/2000 [11:08<27:25,  1.07s/it]

Training_loss 16.20702


 23%|██▎       | 462/2000 [11:09<27:20,  1.07s/it]

Training_loss 16.20632


 23%|██▎       | 463/2000 [11:11<28:25,  1.11s/it]

Training_loss 16.19162


 23%|██▎       | 464/2000 [11:12<30:23,  1.19s/it]

Training_loss 16.18068


 23%|██▎       | 465/2000 [11:13<28:54,  1.13s/it]

Training_loss 16.16515


 23%|██▎       | 466/2000 [11:14<28:15,  1.10s/it]

Training_loss 16.17544


 23%|██▎       | 467/2000 [11:15<28:08,  1.10s/it]

Training_loss 16.16948


 23%|██▎       | 468/2000 [11:16<27:33,  1.08s/it]

Training_loss 16.18023


 23%|██▎       | 469/2000 [11:17<26:54,  1.05s/it]

Training_loss 16.15982


 24%|██▎       | 470/2000 [11:18<26:54,  1.05s/it]

Training_loss 16.15989


 24%|██▎       | 471/2000 [11:19<26:17,  1.03s/it]

Training_loss 16.14754


 24%|██▎       | 472/2000 [11:20<26:38,  1.05s/it]

Training_loss 16.13455


 24%|██▎       | 473/2000 [11:21<26:27,  1.04s/it]

Training_loss 16.13236


 24%|██▎       | 474/2000 [11:22<25:38,  1.01s/it]

Training_loss 16.13017


 24%|██▍       | 475/2000 [11:23<25:23,  1.00it/s]

Training_loss 16.11550


 24%|██▍       | 476/2000 [11:24<25:28,  1.00s/it]

Training_loss 16.09716


 24%|██▍       | 477/2000 [11:25<26:02,  1.03s/it]

Training_loss 16.08854


 24%|██▍       | 478/2000 [11:26<26:52,  1.06s/it]

Training_loss 16.08847


 24%|██▍       | 479/2000 [11:28<27:19,  1.08s/it]

Training_loss 16.07114


 24%|██▍       | 480/2000 [11:29<26:53,  1.06s/it]

Training_loss 16.05385


 24%|██▍       | 481/2000 [11:30<26:44,  1.06s/it]

Training_loss 16.05644


 24%|██▍       | 482/2000 [11:31<26:14,  1.04s/it]

Training_loss 16.06349


 24%|██▍       | 483/2000 [11:32<25:45,  1.02s/it]

Training_loss 16.05190


 24%|██▍       | 484/2000 [11:33<25:21,  1.00s/it]

Training_loss 16.04420


 24%|██▍       | 485/2000 [11:34<25:23,  1.01s/it]

Training_loss 16.02431


 24%|██▍       | 486/2000 [11:35<25:18,  1.00s/it]

Training_loss 16.02096


 24%|██▍       | 487/2000 [11:36<25:32,  1.01s/it]

Training_loss 16.01476


 24%|██▍       | 488/2000 [11:37<25:27,  1.01s/it]

Training_loss 16.00970


 24%|██▍       | 489/2000 [11:38<25:39,  1.02s/it]

Training_loss 16.01453


 24%|██▍       | 490/2000 [11:39<25:34,  1.02s/it]

Training_loss 16.00585


 25%|██▍       | 491/2000 [11:40<25:11,  1.00s/it]

Training_loss 16.00478


 25%|██▍       | 492/2000 [11:41<25:34,  1.02s/it]

Training_loss 15.98443


 25%|██▍       | 493/2000 [11:42<25:29,  1.02s/it]

Training_loss 15.98898


 25%|██▍       | 494/2000 [11:43<27:29,  1.10s/it]

Training_loss 15.97461


 25%|██▍       | 495/2000 [11:44<27:28,  1.10s/it]

Training_loss 15.98943


 25%|██▍       | 496/2000 [11:45<27:05,  1.08s/it]

Training_loss 16.00931


 25%|██▍       | 497/2000 [11:46<26:53,  1.07s/it]

Training_loss 16.00536


 25%|██▍       | 498/2000 [11:47<26:07,  1.04s/it]

Training_loss 16.02071


 25%|██▍       | 499/2000 [11:48<26:21,  1.05s/it]

Training_loss 16.03888


 25%|██▌       | 500/2000 [11:49<26:15,  1.05s/it]

Training_loss 16.02597


 25%|██▌       | 501/2000 [11:50<25:53,  1.04s/it]

Training_loss 16.00726


 25%|██▌       | 502/2000 [11:51<25:21,  1.02s/it]

Training_loss 15.99170


 25%|██▌       | 503/2000 [11:52<25:06,  1.01s/it]

Training_loss 15.98606


 25%|██▌       | 504/2000 [11:53<25:23,  1.02s/it]

Training_loss 15.98818


 25%|██▌       | 505/2000 [11:54<24:55,  1.00s/it]

Training_loss 15.97869


 25%|██▌       | 506/2000 [11:55<25:57,  1.04s/it]

Training_loss 15.95419


 25%|██▌       | 507/2000 [11:56<26:44,  1.07s/it]

Training_loss 15.93906


 25%|██▌       | 508/2000 [11:58<26:21,  1.06s/it]

Training_loss 15.93098


 25%|██▌       | 509/2000 [11:59<27:29,  1.11s/it]

Training_loss 15.91181


 26%|██▌       | 510/2000 [12:00<28:32,  1.15s/it]

Training_loss 15.94757


 26%|██▌       | 511/2000 [12:01<28:15,  1.14s/it]

Training_loss 15.93814


 26%|██▌       | 512/2000 [12:02<27:43,  1.12s/it]

Training_loss 15.91310


 26%|██▌       | 513/2000 [12:03<26:22,  1.06s/it]

Training_loss 15.89227


 26%|██▌       | 514/2000 [12:04<26:11,  1.06s/it]

Training_loss 15.88528


 26%|██▌       | 515/2000 [12:05<25:45,  1.04s/it]

Training_loss 15.89283


 26%|██▌       | 516/2000 [12:06<25:30,  1.03s/it]

Training_loss 15.88865


 26%|██▌       | 517/2000 [12:07<25:21,  1.03s/it]

Training_loss 15.88294


 26%|██▌       | 518/2000 [12:08<25:04,  1.02s/it]

Training_loss 15.87235


 26%|██▌       | 519/2000 [12:09<26:00,  1.05s/it]

Training_loss 15.87541


 26%|██▌       | 520/2000 [12:10<26:50,  1.09s/it]

Training_loss 15.85073


 26%|██▌       | 521/2000 [12:11<25:47,  1.05s/it]

Training_loss 15.83898


 26%|██▌       | 522/2000 [12:12<25:04,  1.02s/it]

Training_loss 15.82890


 26%|██▌       | 523/2000 [12:13<24:56,  1.01s/it]

Training_loss 15.80871


 26%|██▌       | 524/2000 [12:15<25:52,  1.05s/it]

Training_loss 15.80543


 26%|██▋       | 525/2000 [12:16<27:40,  1.13s/it]

Training_loss 15.79531


 26%|██▋       | 526/2000 [12:17<26:56,  1.10s/it]

Training_loss 15.79748


 26%|██▋       | 527/2000 [12:18<26:56,  1.10s/it]

Training_loss 15.80206


 26%|██▋       | 528/2000 [12:19<26:01,  1.06s/it]

Training_loss 15.78817


 26%|██▋       | 529/2000 [12:20<25:38,  1.05s/it]

Training_loss 15.77703


 26%|██▋       | 530/2000 [12:21<25:33,  1.04s/it]

Training_loss 15.77064


 27%|██▋       | 531/2000 [12:22<25:12,  1.03s/it]

Training_loss 15.75770


 27%|██▋       | 532/2000 [12:23<25:04,  1.02s/it]

Training_loss 15.74349


 27%|██▋       | 533/2000 [12:24<25:15,  1.03s/it]

Training_loss 15.73047


 27%|██▋       | 534/2000 [12:25<25:11,  1.03s/it]

Training_loss 15.71988


 27%|██▋       | 535/2000 [12:26<25:49,  1.06s/it]

Training_loss 15.70485


 27%|██▋       | 536/2000 [12:27<25:41,  1.05s/it]

Training_loss 15.68281


 27%|██▋       | 537/2000 [12:28<25:15,  1.04s/it]

Training_loss 15.67963


 27%|██▋       | 538/2000 [12:29<24:59,  1.03s/it]

Training_loss 15.68590


 27%|██▋       | 539/2000 [12:30<25:20,  1.04s/it]

Training_loss 15.68149


 27%|██▋       | 540/2000 [12:32<26:45,  1.10s/it]

Training_loss 15.69169


 27%|██▋       | 541/2000 [12:33<26:00,  1.07s/it]

Training_loss 15.70680


 27%|██▋       | 542/2000 [12:34<26:16,  1.08s/it]

Training_loss 15.70714


 27%|██▋       | 543/2000 [12:35<26:13,  1.08s/it]

Training_loss 15.71394


 27%|██▋       | 544/2000 [12:36<25:12,  1.04s/it]

Training_loss 15.69934


 27%|██▋       | 545/2000 [12:37<25:58,  1.07s/it]

Training_loss 15.68375


 27%|██▋       | 546/2000 [12:38<25:35,  1.06s/it]

Training_loss 15.66466


 27%|██▋       | 547/2000 [12:39<24:29,  1.01s/it]

Training_loss 15.66155


 27%|██▋       | 548/2000 [12:40<24:41,  1.02s/it]

Training_loss 15.64221


 27%|██▋       | 549/2000 [12:41<24:52,  1.03s/it]

Training_loss 15.64696


 28%|██▊       | 550/2000 [12:42<24:29,  1.01s/it]

Training_loss 15.65279


 28%|██▊       | 551/2000 [12:43<24:19,  1.01s/it]

Training_loss 15.64193


 28%|██▊       | 552/2000 [12:44<23:50,  1.01it/s]

Training_loss 15.63785


 28%|██▊       | 553/2000 [12:45<23:42,  1.02it/s]

Training_loss 15.61494


 28%|██▊       | 554/2000 [12:46<23:59,  1.00it/s]

Training_loss 15.61055


 28%|██▊       | 555/2000 [12:47<25:50,  1.07s/it]

Training_loss 15.60850


 28%|██▊       | 556/2000 [12:48<25:35,  1.06s/it]

Training_loss 15.57167


 28%|██▊       | 557/2000 [12:49<25:58,  1.08s/it]

Training_loss 15.58161


 28%|██▊       | 558/2000 [12:50<25:03,  1.04s/it]

Training_loss 15.56676


 28%|██▊       | 559/2000 [12:51<24:32,  1.02s/it]

Training_loss 15.57626


 28%|██▊       | 560/2000 [12:52<24:28,  1.02s/it]

Training_loss 15.57265


 28%|██▊       | 561/2000 [12:53<25:34,  1.07s/it]

Training_loss 15.57069


 28%|██▊       | 562/2000 [12:54<26:01,  1.09s/it]

Training_loss 15.55203


 28%|██▊       | 563/2000 [12:56<26:40,  1.11s/it]

Training_loss 15.55090


 28%|██▊       | 564/2000 [12:57<26:03,  1.09s/it]

Training_loss 15.55789


 28%|██▊       | 565/2000 [12:58<25:51,  1.08s/it]

Training_loss 15.53166


 28%|██▊       | 566/2000 [12:59<28:03,  1.17s/it]

Training_loss 15.53218


 28%|██▊       | 567/2000 [13:00<26:31,  1.11s/it]

Training_loss 15.54507


 28%|██▊       | 568/2000 [13:01<25:54,  1.09s/it]

Training_loss 15.54079


 28%|██▊       | 569/2000 [13:02<25:26,  1.07s/it]

Training_loss 15.56170


 28%|██▊       | 570/2000 [13:03<25:51,  1.08s/it]

Training_loss 15.56340


 29%|██▊       | 571/2000 [13:04<25:51,  1.09s/it]

Training_loss 15.56153


 29%|██▊       | 572/2000 [13:05<25:11,  1.06s/it]

Training_loss 15.55651


 29%|██▊       | 573/2000 [13:06<25:08,  1.06s/it]

Training_loss 15.53511


 29%|██▊       | 574/2000 [13:07<25:46,  1.08s/it]

Training_loss 15.52660


 29%|██▉       | 575/2000 [13:09<28:23,  1.20s/it]

Training_loss 15.51596


 29%|██▉       | 576/2000 [13:10<27:35,  1.16s/it]

Training_loss 15.51433


 29%|██▉       | 577/2000 [13:11<26:42,  1.13s/it]

Training_loss 15.50753


 29%|██▉       | 578/2000 [13:12<26:01,  1.10s/it]

Training_loss 15.50347


 29%|██▉       | 579/2000 [13:13<25:53,  1.09s/it]

Training_loss 15.50843


 29%|██▉       | 580/2000 [13:14<25:31,  1.08s/it]

Training_loss 15.49630


 29%|██▉       | 581/2000 [13:15<24:57,  1.06s/it]

Training_loss 15.49443


 29%|██▉       | 582/2000 [13:16<25:07,  1.06s/it]

Training_loss 15.48464


 29%|██▉       | 583/2000 [13:17<24:14,  1.03s/it]

Training_loss 15.48292


 29%|██▉       | 584/2000 [13:18<24:58,  1.06s/it]

Training_loss 15.48618


 29%|██▉       | 585/2000 [13:20<26:17,  1.11s/it]

Training_loss 15.48872


 29%|██▉       | 586/2000 [13:21<26:00,  1.10s/it]

Training_loss 15.50105


 29%|██▉       | 587/2000 [13:22<24:58,  1.06s/it]

Training_loss 15.49062


 29%|██▉       | 588/2000 [13:23<23:58,  1.02s/it]

Training_loss 15.48689


 29%|██▉       | 589/2000 [13:24<24:10,  1.03s/it]

Training_loss 15.49782


 30%|██▉       | 590/2000 [13:25<23:38,  1.01s/it]

Training_loss 15.49763


 30%|██▉       | 591/2000 [13:26<23:30,  1.00s/it]

Training_loss 15.50213


 30%|██▉       | 592/2000 [13:27<23:53,  1.02s/it]

Training_loss 15.49046


 30%|██▉       | 593/2000 [13:28<23:05,  1.02it/s]

Training_loss 15.48113


 30%|██▉       | 594/2000 [13:29<24:04,  1.03s/it]

Training_loss 15.45978


 30%|██▉       | 595/2000 [13:30<24:38,  1.05s/it]

Training_loss 15.46435


 30%|██▉       | 596/2000 [13:31<24:09,  1.03s/it]

Training_loss 15.44955


 30%|██▉       | 597/2000 [13:32<23:52,  1.02s/it]

Training_loss 15.44628


 30%|██▉       | 598/2000 [13:33<23:44,  1.02s/it]

Training_loss 15.46352


 30%|██▉       | 599/2000 [13:34<24:40,  1.06s/it]

Training_loss 15.46244


 30%|███       | 600/2000 [13:35<25:28,  1.09s/it]

Training_loss 15.43374


 30%|███       | 601/2000 [13:36<25:19,  1.09s/it]

Training_loss 15.43127


 30%|███       | 602/2000 [13:37<24:30,  1.05s/it]

Training_loss 15.43017


 30%|███       | 603/2000 [13:38<24:42,  1.06s/it]

Training_loss 15.43122


 30%|███       | 604/2000 [13:39<23:51,  1.03s/it]

Training_loss 15.42837


 30%|███       | 605/2000 [13:40<24:08,  1.04s/it]

Training_loss 15.40491


 30%|███       | 606/2000 [13:41<23:55,  1.03s/it]

Training_loss 15.38937


 30%|███       | 607/2000 [13:42<24:03,  1.04s/it]

Training_loss 15.35694


 30%|███       | 608/2000 [13:43<24:27,  1.05s/it]

Training_loss 15.34721


 30%|███       | 609/2000 [13:44<23:47,  1.03s/it]

Training_loss 15.33101


 30%|███       | 610/2000 [13:45<23:16,  1.00s/it]

Training_loss 15.32414


 31%|███       | 611/2000 [13:46<23:51,  1.03s/it]

Training_loss 15.30394


 31%|███       | 612/2000 [13:47<23:32,  1.02s/it]

Training_loss 15.29900


 31%|███       | 613/2000 [13:48<23:08,  1.00s/it]

Training_loss 15.29847


 31%|███       | 614/2000 [13:49<23:10,  1.00s/it]

Training_loss 15.30259


 31%|███       | 615/2000 [13:51<24:14,  1.05s/it]

Training_loss 15.29898


 31%|███       | 616/2000 [13:52<25:39,  1.11s/it]

Training_loss 15.30000


 31%|███       | 617/2000 [13:53<25:13,  1.09s/it]

Training_loss 15.28761


 31%|███       | 618/2000 [13:54<24:31,  1.06s/it]

Training_loss 15.30254


 31%|███       | 619/2000 [13:55<25:11,  1.09s/it]

Training_loss 15.28458


 31%|███       | 620/2000 [13:56<25:03,  1.09s/it]

Training_loss 15.26498


 31%|███       | 621/2000 [13:57<24:26,  1.06s/it]

Training_loss 15.26628


 31%|███       | 622/2000 [13:58<23:36,  1.03s/it]

Training_loss 15.25589


 31%|███       | 623/2000 [13:59<23:30,  1.02s/it]

Training_loss 15.25846


 31%|███       | 624/2000 [14:00<23:02,  1.00s/it]

Training_loss 15.25314


 31%|███▏      | 625/2000 [14:01<22:48,  1.00it/s]

Training_loss 15.25272


 31%|███▏      | 626/2000 [14:02<22:34,  1.01it/s]

Training_loss 15.24106


 31%|███▏      | 627/2000 [14:03<22:13,  1.03it/s]

Training_loss 15.24705


 31%|███▏      | 628/2000 [14:04<22:14,  1.03it/s]

Training_loss 15.23274


 31%|███▏      | 629/2000 [14:05<23:23,  1.02s/it]

Training_loss 15.26102


 32%|███▏      | 630/2000 [14:06<22:41,  1.01it/s]

Training_loss 15.28113


 32%|███▏      | 631/2000 [14:07<23:52,  1.05s/it]

Training_loss 15.28164


 32%|███▏      | 632/2000 [14:08<24:52,  1.09s/it]

Training_loss 15.26983


 32%|███▏      | 633/2000 [14:10<26:23,  1.16s/it]

Training_loss 15.25707


 32%|███▏      | 634/2000 [14:11<25:51,  1.14s/it]

Training_loss 15.23763


 32%|███▏      | 635/2000 [14:12<24:55,  1.10s/it]

Training_loss 15.23430


 32%|███▏      | 636/2000 [14:13<24:11,  1.06s/it]

Training_loss 15.23938


 32%|███▏      | 637/2000 [14:14<24:01,  1.06s/it]

Training_loss 15.23442


 32%|███▏      | 638/2000 [14:15<24:02,  1.06s/it]

Training_loss 15.23089


 32%|███▏      | 639/2000 [14:16<24:02,  1.06s/it]

Training_loss 15.23737


 32%|███▏      | 640/2000 [14:17<24:40,  1.09s/it]

Training_loss 15.21609


 32%|███▏      | 641/2000 [14:18<24:33,  1.08s/it]

Training_loss 15.20592


 32%|███▏      | 642/2000 [14:19<24:47,  1.10s/it]

Training_loss 15.20227


 32%|███▏      | 643/2000 [14:20<25:00,  1.11s/it]

Training_loss 15.20393


 32%|███▏      | 644/2000 [14:21<23:57,  1.06s/it]

Training_loss 15.21168


 32%|███▏      | 645/2000 [14:22<24:32,  1.09s/it]

Training_loss 15.21648


 32%|███▏      | 646/2000 [14:24<25:45,  1.14s/it]

Training_loss 15.22408


 32%|███▏      | 647/2000 [14:25<24:28,  1.09s/it]

Training_loss 15.23420


 32%|███▏      | 648/2000 [14:26<23:39,  1.05s/it]

Training_loss 15.23733


 32%|███▏      | 649/2000 [14:27<23:11,  1.03s/it]

Training_loss 15.24552


 32%|███▎      | 650/2000 [14:28<23:02,  1.02s/it]

Training_loss 15.23389


 33%|███▎      | 651/2000 [14:29<22:38,  1.01s/it]

Training_loss 15.24832


 33%|███▎      | 652/2000 [14:30<23:15,  1.04s/it]

Training_loss 15.26316


 33%|███▎      | 653/2000 [14:31<23:11,  1.03s/it]

Training_loss 15.25874


 33%|███▎      | 654/2000 [14:32<22:53,  1.02s/it]

Training_loss 15.26726


 33%|███▎      | 655/2000 [14:33<22:44,  1.01s/it]

Training_loss 15.25575


 33%|███▎      | 656/2000 [14:34<22:10,  1.01it/s]

Training_loss 15.23632


 33%|███▎      | 657/2000 [14:35<22:46,  1.02s/it]

Training_loss 15.23650


 33%|███▎      | 658/2000 [14:36<22:56,  1.03s/it]

Training_loss 15.24748


 33%|███▎      | 659/2000 [14:37<22:54,  1.02s/it]

Training_loss 15.25432


 33%|███▎      | 660/2000 [14:38<22:22,  1.00s/it]

Training_loss 15.26716


 33%|███▎      | 661/2000 [14:39<23:06,  1.04s/it]

Training_loss 15.25468


 33%|███▎      | 662/2000 [14:40<23:10,  1.04s/it]

Training_loss 15.26473


 33%|███▎      | 663/2000 [14:41<22:32,  1.01s/it]

Training_loss 15.27378


 33%|███▎      | 664/2000 [14:42<22:23,  1.01s/it]

Training_loss 15.25579


 33%|███▎      | 665/2000 [14:43<22:17,  1.00s/it]

Training_loss 15.26032


 33%|███▎      | 666/2000 [14:44<21:30,  1.03it/s]

Training_loss 15.25471


 33%|███▎      | 667/2000 [14:45<22:07,  1.00it/s]

Training_loss 15.23571


 33%|███▎      | 668/2000 [14:46<21:44,  1.02it/s]

Training_loss 15.23662


 33%|███▎      | 669/2000 [14:47<21:46,  1.02it/s]

Training_loss 15.24532


 34%|███▎      | 670/2000 [14:48<23:21,  1.05s/it]

Training_loss 15.22232


 34%|███▎      | 671/2000 [14:49<23:52,  1.08s/it]

Training_loss 15.21831


 34%|███▎      | 672/2000 [14:50<23:44,  1.07s/it]

Training_loss 15.20132


 34%|███▎      | 673/2000 [14:51<23:32,  1.06s/it]

Training_loss 15.19702


 34%|███▎      | 674/2000 [14:52<23:50,  1.08s/it]

Training_loss 15.18760


 34%|███▍      | 675/2000 [14:53<23:08,  1.05s/it]

Training_loss 15.18488


 34%|███▍      | 676/2000 [14:54<23:03,  1.05s/it]

Training_loss 15.18091


 34%|███▍      | 677/2000 [14:56<24:37,  1.12s/it]

Training_loss 15.19394


 34%|███▍      | 678/2000 [14:57<23:45,  1.08s/it]

Training_loss 15.18345


 34%|███▍      | 679/2000 [14:58<24:20,  1.11s/it]

Training_loss 15.17121


 34%|███▍      | 680/2000 [14:59<24:05,  1.10s/it]

Training_loss 15.17641


 34%|███▍      | 681/2000 [15:00<24:06,  1.10s/it]

Training_loss 15.17692


 34%|███▍      | 682/2000 [15:01<23:44,  1.08s/it]

Training_loss 15.19339


 34%|███▍      | 683/2000 [15:02<22:40,  1.03s/it]

Training_loss 15.18442


 34%|███▍      | 684/2000 [15:03<22:23,  1.02s/it]

Training_loss 15.19340


 34%|███▍      | 685/2000 [15:04<21:48,  1.00it/s]

Training_loss 15.20714


 34%|███▍      | 686/2000 [15:05<22:22,  1.02s/it]

Training_loss 15.21638


 34%|███▍      | 687/2000 [15:06<22:06,  1.01s/it]

Training_loss 15.21852


 34%|███▍      | 688/2000 [15:07<22:10,  1.01s/it]

Training_loss 15.22306


 34%|███▍      | 689/2000 [15:08<21:49,  1.00it/s]

Training_loss 15.21306


 34%|███▍      | 690/2000 [15:09<22:23,  1.03s/it]

Training_loss 15.21349


 35%|███▍      | 691/2000 [15:10<22:12,  1.02s/it]

Training_loss 15.22429


 35%|███▍      | 692/2000 [15:11<24:00,  1.10s/it]

Training_loss 15.22615


 35%|███▍      | 693/2000 [15:12<24:17,  1.11s/it]

Training_loss 15.23018


 35%|███▍      | 694/2000 [15:13<23:49,  1.09s/it]

Training_loss 15.23275


 35%|███▍      | 695/2000 [15:14<22:59,  1.06s/it]

Training_loss 15.25066


 35%|███▍      | 696/2000 [15:15<22:16,  1.02s/it]

Training_loss 15.26912


 35%|███▍      | 697/2000 [15:16<23:07,  1.06s/it]

Training_loss 15.27732


 35%|███▍      | 698/2000 [15:18<23:08,  1.07s/it]

Training_loss 15.28015


 35%|███▍      | 699/2000 [15:19<22:26,  1.03s/it]

Training_loss 15.26994


 35%|███▌      | 700/2000 [15:20<22:14,  1.03s/it]

Training_loss 15.25680


 35%|███▌      | 701/2000 [15:21<21:57,  1.01s/it]

Training_loss 15.23285


 35%|███▌      | 702/2000 [15:22<21:52,  1.01s/it]

Training_loss 15.24098


 35%|███▌      | 703/2000 [15:22<21:08,  1.02it/s]

Training_loss 15.24393


 35%|███▌      | 704/2000 [15:23<21:27,  1.01it/s]

Training_loss 15.24359


 35%|███▌      | 705/2000 [15:24<21:27,  1.01it/s]

Training_loss 15.27124


 35%|███▌      | 706/2000 [15:25<21:14,  1.01it/s]

Training_loss 15.26997


 35%|███▌      | 707/2000 [15:27<22:17,  1.03s/it]

Training_loss 15.26537


 35%|███▌      | 708/2000 [15:28<23:03,  1.07s/it]

Training_loss 15.28225


 35%|███▌      | 709/2000 [15:29<23:07,  1.07s/it]

Training_loss 15.28645


 36%|███▌      | 710/2000 [15:30<22:34,  1.05s/it]

Training_loss 15.29597


 36%|███▌      | 711/2000 [15:31<22:17,  1.04s/it]

Training_loss 15.30488


 36%|███▌      | 712/2000 [15:32<22:20,  1.04s/it]

Training_loss 15.31691


 36%|███▌      | 713/2000 [15:33<22:19,  1.04s/it]

Training_loss 15.32420


 36%|███▌      | 714/2000 [15:34<23:12,  1.08s/it]

Training_loss 15.32519


 36%|███▌      | 715/2000 [15:35<22:42,  1.06s/it]

Training_loss 15.33956


 36%|███▌      | 716/2000 [15:36<21:50,  1.02s/it]

Training_loss 15.34045


 36%|███▌      | 717/2000 [15:37<21:34,  1.01s/it]

Training_loss 15.34498


 36%|███▌      | 718/2000 [15:38<22:40,  1.06s/it]

Training_loss 15.37208


 36%|███▌      | 719/2000 [15:39<22:28,  1.05s/it]

Training_loss 15.38888


 36%|███▌      | 720/2000 [15:40<22:53,  1.07s/it]

Training_loss 15.37481


 36%|███▌      | 721/2000 [15:41<22:26,  1.05s/it]

Training_loss 15.36883


 36%|███▌      | 722/2000 [15:43<23:52,  1.12s/it]

Training_loss 15.36475


 36%|███▌      | 723/2000 [15:44<24:09,  1.14s/it]

Training_loss 15.35398


 36%|███▌      | 724/2000 [15:45<23:25,  1.10s/it]

Training_loss 15.35613


 36%|███▋      | 725/2000 [15:46<23:02,  1.08s/it]

Training_loss 15.32773


 36%|███▋      | 726/2000 [15:47<21:44,  1.02s/it]

Training_loss 15.33300


 36%|███▋      | 727/2000 [15:48<21:56,  1.03s/it]

Training_loss 15.33271


 36%|███▋      | 728/2000 [15:49<21:48,  1.03s/it]

Training_loss 15.33147


 36%|███▋      | 729/2000 [15:50<21:45,  1.03s/it]

Training_loss 15.30532


 36%|███▋      | 730/2000 [15:51<21:28,  1.01s/it]

Training_loss 15.32732


 37%|███▋      | 731/2000 [15:52<21:08,  1.00it/s]

Training_loss 15.31971


 37%|███▋      | 732/2000 [15:53<20:59,  1.01it/s]

Training_loss 15.32973


 37%|███▋      | 733/2000 [15:54<20:53,  1.01it/s]

Training_loss 15.31848


 37%|███▋      | 734/2000 [15:55<21:35,  1.02s/it]

Training_loss 15.33660


 37%|███▋      | 735/2000 [15:56<21:28,  1.02s/it]

Training_loss 15.34003


 37%|███▋      | 736/2000 [15:57<21:35,  1.02s/it]

Training_loss 15.35575


 37%|███▋      | 737/2000 [15:58<22:10,  1.05s/it]

Training_loss 15.34056


 37%|███▋      | 738/2000 [15:59<23:37,  1.12s/it]

Training_loss 15.36047


 37%|███▋      | 739/2000 [16:00<23:14,  1.11s/it]

Training_loss 15.37665


 37%|███▋      | 740/2000 [16:01<22:32,  1.07s/it]

Training_loss 15.37982


 37%|███▋      | 741/2000 [16:02<22:31,  1.07s/it]

Training_loss 15.37706


 37%|███▋      | 742/2000 [16:03<22:19,  1.06s/it]

Training_loss 15.38725


 37%|███▋      | 743/2000 [16:04<21:22,  1.02s/it]

Training_loss 15.38704


 37%|███▋      | 744/2000 [16:05<21:13,  1.01s/it]

Training_loss 15.40879


 37%|███▋      | 745/2000 [16:06<21:42,  1.04s/it]

Training_loss 15.40564


 37%|███▋      | 746/2000 [16:07<21:21,  1.02s/it]

Training_loss 15.41433


 37%|███▋      | 747/2000 [16:09<22:57,  1.10s/it]

Training_loss 15.39995


 37%|███▋      | 748/2000 [16:10<23:35,  1.13s/it]

Training_loss 15.38807


 37%|███▋      | 749/2000 [16:11<23:34,  1.13s/it]

Training_loss 15.39043


 38%|███▊      | 750/2000 [16:12<23:18,  1.12s/it]

Training_loss 15.39870


 38%|███▊      | 751/2000 [16:13<22:59,  1.10s/it]

Training_loss 15.38807


 38%|███▊      | 752/2000 [16:14<23:24,  1.13s/it]

Training_loss 15.39034


 38%|███▊      | 753/2000 [16:16<24:00,  1.16s/it]

Training_loss 15.35330


 38%|███▊      | 754/2000 [16:17<23:37,  1.14s/it]

Training_loss 15.34915


 38%|███▊      | 755/2000 [16:18<23:06,  1.11s/it]

Training_loss 15.38089


 38%|███▊      | 756/2000 [16:19<22:12,  1.07s/it]

Training_loss 15.39126


 38%|███▊      | 757/2000 [16:20<21:39,  1.05s/it]

Training_loss 15.38241


 38%|███▊      | 758/2000 [16:21<21:08,  1.02s/it]

Training_loss 15.38563


 38%|███▊      | 759/2000 [16:22<21:06,  1.02s/it]

Training_loss 15.39676


 38%|███▊      | 760/2000 [16:23<21:05,  1.02s/it]

Training_loss 15.39099


 38%|███▊      | 761/2000 [16:24<20:55,  1.01s/it]

Training_loss 15.37462


 38%|███▊      | 762/2000 [16:25<20:49,  1.01s/it]

Training_loss 15.39600


 38%|███▊      | 763/2000 [16:26<20:22,  1.01it/s]

Training_loss 15.41719


 38%|███▊      | 764/2000 [16:27<20:17,  1.02it/s]

Training_loss 15.40999


 38%|███▊      | 765/2000 [16:28<20:27,  1.01it/s]

Training_loss 15.40994


 38%|███▊      | 766/2000 [16:29<20:32,  1.00it/s]

Training_loss 15.40320


 38%|███▊      | 767/2000 [16:30<20:34,  1.00s/it]

Training_loss 15.42128


 38%|███▊      | 768/2000 [16:31<21:44,  1.06s/it]

Training_loss 15.43531


 38%|███▊      | 769/2000 [16:32<22:13,  1.08s/it]

Training_loss 15.40259


 38%|███▊      | 770/2000 [16:33<22:34,  1.10s/it]

Training_loss 15.40413


 39%|███▊      | 771/2000 [16:34<22:01,  1.08s/it]

Training_loss 15.43422


 39%|███▊      | 772/2000 [16:35<21:52,  1.07s/it]

Training_loss 15.45257


 39%|███▊      | 773/2000 [16:36<20:49,  1.02s/it]

Training_loss 15.44188


 39%|███▊      | 774/2000 [16:37<20:38,  1.01s/it]

Training_loss 15.43547


 39%|███▉      | 775/2000 [16:38<20:34,  1.01s/it]

Training_loss 15.43533


 39%|███▉      | 776/2000 [16:39<20:59,  1.03s/it]

Training_loss 15.42418


 39%|███▉      | 777/2000 [16:40<21:14,  1.04s/it]

Training_loss 15.43617


 39%|███▉      | 778/2000 [16:41<21:08,  1.04s/it]

Training_loss 15.44346


 39%|███▉      | 779/2000 [16:42<20:57,  1.03s/it]

Training_loss 15.44449


 39%|███▉      | 780/2000 [16:43<20:53,  1.03s/it]

Training_loss 15.43899


 39%|███▉      | 781/2000 [16:44<21:26,  1.05s/it]

Training_loss 15.42388


 39%|███▉      | 782/2000 [16:46<21:23,  1.05s/it]

Training_loss 15.42321


 39%|███▉      | 783/2000 [16:47<21:13,  1.05s/it]

Training_loss 15.43775


 39%|███▉      | 784/2000 [16:48<21:38,  1.07s/it]

Training_loss 15.44187


 39%|███▉      | 785/2000 [16:49<22:54,  1.13s/it]

Training_loss 15.42545


 39%|███▉      | 786/2000 [16:50<22:50,  1.13s/it]

Training_loss 15.43043


 39%|███▉      | 787/2000 [16:51<22:21,  1.11s/it]

Training_loss 15.43851


 39%|███▉      | 788/2000 [16:52<21:43,  1.08s/it]

Training_loss 15.43327


 39%|███▉      | 789/2000 [16:53<21:17,  1.06s/it]

Training_loss 15.42150


 40%|███▉      | 790/2000 [16:54<20:56,  1.04s/it]

Training_loss 15.41960


 40%|███▉      | 791/2000 [16:55<20:28,  1.02s/it]

Training_loss 15.44707


 40%|███▉      | 792/2000 [16:56<20:02,  1.00it/s]

Training_loss 15.44766


 40%|███▉      | 793/2000 [16:57<19:30,  1.03it/s]

Training_loss 15.43782


 40%|███▉      | 794/2000 [16:58<19:20,  1.04it/s]

Training_loss 15.44037


 40%|███▉      | 795/2000 [16:59<19:35,  1.03it/s]

Training_loss 15.44931


 40%|███▉      | 796/2000 [17:00<19:36,  1.02it/s]

Training_loss 15.46871


 40%|███▉      | 797/2000 [17:01<20:31,  1.02s/it]

Training_loss 15.48046


 40%|███▉      | 798/2000 [17:02<20:43,  1.03s/it]

Training_loss 15.48259


 40%|███▉      | 799/2000 [17:03<22:33,  1.13s/it]

Training_loss 15.47986


 40%|████      | 800/2000 [17:04<21:53,  1.09s/it]

Training_loss 15.48537


 40%|████      | 801/2000 [17:05<21:11,  1.06s/it]

Training_loss 15.48425


 40%|████      | 802/2000 [17:06<20:52,  1.05s/it]

Training_loss 15.46420


 40%|████      | 803/2000 [17:07<20:54,  1.05s/it]

Training_loss 15.44679


 40%|████      | 804/2000 [17:09<21:20,  1.07s/it]

Training_loss 15.44406


 40%|████      | 805/2000 [17:10<20:46,  1.04s/it]

Training_loss 15.45082


 40%|████      | 806/2000 [17:11<21:14,  1.07s/it]

Training_loss 15.44300


 40%|████      | 807/2000 [17:12<20:48,  1.05s/it]

Training_loss 15.43521


 40%|████      | 808/2000 [17:13<20:28,  1.03s/it]

Training_loss 15.45820


 40%|████      | 809/2000 [17:14<20:51,  1.05s/it]

Training_loss 15.47261


 40%|████      | 810/2000 [17:15<20:16,  1.02s/it]

Training_loss 15.47547


 41%|████      | 811/2000 [17:16<20:19,  1.03s/it]

Training_loss 15.46885


 41%|████      | 812/2000 [17:17<20:09,  1.02s/it]

Training_loss 15.48629


 41%|████      | 813/2000 [17:18<20:32,  1.04s/it]

Training_loss 15.48281


 41%|████      | 814/2000 [17:19<21:27,  1.09s/it]

Training_loss 15.47584


 41%|████      | 815/2000 [17:20<21:48,  1.10s/it]

Training_loss 15.47491


 41%|████      | 816/2000 [17:21<21:23,  1.08s/it]

Training_loss 15.48388


 41%|████      | 817/2000 [17:22<21:06,  1.07s/it]

Training_loss 15.49561


 41%|████      | 818/2000 [17:23<20:33,  1.04s/it]

Training_loss 15.49970


 41%|████      | 819/2000 [17:24<19:41,  1.00s/it]

Training_loss 15.50465


 41%|████      | 820/2000 [17:25<19:24,  1.01it/s]

Training_loss 15.51337


 41%|████      | 821/2000 [17:26<19:10,  1.02it/s]

Training_loss 15.53895


 41%|████      | 822/2000 [17:27<19:16,  1.02it/s]

Training_loss 15.51934


 41%|████      | 823/2000 [17:28<19:30,  1.01it/s]

Training_loss 15.51648


 41%|████      | 824/2000 [17:29<18:53,  1.04it/s]

Training_loss 15.52410


 41%|████▏     | 825/2000 [17:30<19:08,  1.02it/s]

Training_loss 15.52414


 41%|████▏     | 826/2000 [17:31<18:59,  1.03it/s]

Training_loss 15.52033


 41%|████▏     | 827/2000 [17:32<18:58,  1.03it/s]

Training_loss 15.50549


 41%|████▏     | 828/2000 [17:33<18:50,  1.04it/s]

Training_loss 15.49758


 41%|████▏     | 829/2000 [17:34<19:14,  1.01it/s]

Training_loss 15.51126


 42%|████▏     | 830/2000 [17:35<20:41,  1.06s/it]

Training_loss 15.50323


 42%|████▏     | 831/2000 [17:36<20:34,  1.06s/it]

Training_loss 15.51116


 42%|████▏     | 832/2000 [17:37<20:01,  1.03s/it]

Training_loss 15.51209


 42%|████▏     | 833/2000 [17:38<19:26,  1.00it/s]

Training_loss 15.50996


 42%|████▏     | 834/2000 [17:39<19:38,  1.01s/it]

Training_loss 15.50395


 42%|████▏     | 835/2000 [17:40<19:14,  1.01it/s]

Training_loss 15.50958


 42%|████▏     | 836/2000 [17:41<19:22,  1.00it/s]

Training_loss 15.52163


 42%|████▏     | 837/2000 [17:42<18:58,  1.02it/s]

Training_loss 15.54010


 42%|████▏     | 838/2000 [17:43<18:59,  1.02it/s]

Training_loss 15.54088


 42%|████▏     | 839/2000 [17:44<19:14,  1.01it/s]

Training_loss 15.54071


 42%|████▏     | 840/2000 [17:45<18:49,  1.03it/s]

Training_loss 15.52562


 42%|████▏     | 841/2000 [17:46<19:29,  1.01s/it]

Training_loss 15.54668


 42%|████▏     | 842/2000 [17:47<19:11,  1.01it/s]

Training_loss 15.55411


 42%|████▏     | 843/2000 [17:48<20:06,  1.04s/it]

Training_loss 15.54061


 42%|████▏     | 844/2000 [17:49<19:56,  1.04s/it]

Training_loss 15.55162


 42%|████▏     | 845/2000 [17:50<20:00,  1.04s/it]

Training_loss 15.57100


 42%|████▏     | 846/2000 [17:51<20:56,  1.09s/it]

Training_loss 15.55495


 42%|████▏     | 847/2000 [17:52<20:35,  1.07s/it]

Training_loss 15.57419


 42%|████▏     | 848/2000 [17:54<20:42,  1.08s/it]

Training_loss 15.58063


 42%|████▏     | 849/2000 [17:55<20:11,  1.05s/it]

Training_loss 15.59278


 42%|████▎     | 850/2000 [17:56<20:12,  1.05s/it]

Training_loss 15.59431


 43%|████▎     | 851/2000 [17:57<19:42,  1.03s/it]

Training_loss 15.60302


 43%|████▎     | 852/2000 [17:58<21:15,  1.11s/it]

Training_loss 15.62581


 43%|████▎     | 853/2000 [17:59<20:42,  1.08s/it]

Training_loss 15.62305


 43%|████▎     | 854/2000 [18:00<20:05,  1.05s/it]

Training_loss 15.63186


 43%|████▎     | 855/2000 [18:01<19:26,  1.02s/it]

Training_loss 15.63746


 43%|████▎     | 856/2000 [18:02<19:12,  1.01s/it]

Training_loss 15.65284


 43%|████▎     | 857/2000 [18:03<19:09,  1.01s/it]

Training_loss 15.63791


 43%|████▎     | 858/2000 [18:04<18:50,  1.01it/s]

Training_loss 15.64098


 43%|████▎     | 859/2000 [18:05<18:47,  1.01it/s]

Training_loss 15.65341


 43%|████▎     | 860/2000 [18:06<18:53,  1.01it/s]

Training_loss 15.66892


 43%|████▎     | 861/2000 [18:07<20:30,  1.08s/it]

Training_loss 15.66678


 43%|████▎     | 862/2000 [18:08<21:19,  1.12s/it]

Training_loss 15.66936


 43%|████▎     | 863/2000 [18:10<22:22,  1.18s/it]

Training_loss 15.68421


 43%|████▎     | 864/2000 [18:11<22:54,  1.21s/it]

Training_loss 15.65901


 43%|████▎     | 865/2000 [18:12<22:14,  1.18s/it]

Training_loss 15.67571


 43%|████▎     | 866/2000 [18:13<20:52,  1.10s/it]

Training_loss 15.67748


 43%|████▎     | 867/2000 [18:14<20:23,  1.08s/it]

Training_loss 15.69718


 43%|████▎     | 868/2000 [18:15<20:13,  1.07s/it]

Training_loss 15.71482


 43%|████▎     | 869/2000 [18:16<19:48,  1.05s/it]

Training_loss 15.70668


 44%|████▎     | 870/2000 [18:17<19:37,  1.04s/it]

Training_loss 15.69461


 44%|████▎     | 871/2000 [18:18<19:16,  1.02s/it]

Training_loss 15.70196


 44%|████▎     | 872/2000 [18:19<19:12,  1.02s/it]

Training_loss 15.70572


 44%|████▎     | 873/2000 [18:20<19:15,  1.03s/it]

Training_loss 15.71791


 44%|████▎     | 874/2000 [18:21<19:19,  1.03s/it]

Training_loss 15.70470


 44%|████▍     | 875/2000 [18:22<20:02,  1.07s/it]

Training_loss 15.70224


 44%|████▍     | 876/2000 [18:24<23:01,  1.23s/it]

Training_loss 15.70972


 44%|████▍     | 877/2000 [18:25<22:04,  1.18s/it]

Training_loss 15.69692


 44%|████▍     | 878/2000 [18:26<21:27,  1.15s/it]

Training_loss 15.69824


 44%|████▍     | 879/2000 [18:27<20:23,  1.09s/it]

Training_loss 15.68177


 44%|████▍     | 880/2000 [18:28<19:59,  1.07s/it]

Training_loss 15.68276


 44%|████▍     | 881/2000 [18:29<20:21,  1.09s/it]

Training_loss 15.68035


 44%|████▍     | 882/2000 [18:30<19:43,  1.06s/it]

Training_loss 15.66427


 44%|████▍     | 883/2000 [18:31<19:30,  1.05s/it]

Training_loss 15.66353


 44%|████▍     | 884/2000 [18:32<19:14,  1.03s/it]

Training_loss 15.65703


 44%|████▍     | 885/2000 [18:33<19:15,  1.04s/it]

Training_loss 15.65543


 44%|████▍     | 886/2000 [18:34<20:25,  1.10s/it]

Training_loss 15.67019


 44%|████▍     | 887/2000 [18:36<20:54,  1.13s/it]

Training_loss 15.67977


 44%|████▍     | 888/2000 [18:37<20:42,  1.12s/it]

Training_loss 15.67783


 44%|████▍     | 889/2000 [18:38<21:08,  1.14s/it]

Training_loss 15.69602


 44%|████▍     | 890/2000 [18:39<23:23,  1.26s/it]

Training_loss 15.69783


 45%|████▍     | 891/2000 [18:41<24:29,  1.33s/it]

Training_loss 15.70227


 45%|████▍     | 892/2000 [18:42<24:04,  1.30s/it]

Training_loss 15.67987


 45%|████▍     | 893/2000 [18:43<22:47,  1.24s/it]

Training_loss 15.67468


 45%|████▍     | 894/2000 [18:44<21:03,  1.14s/it]

Training_loss 15.68853


 45%|████▍     | 895/2000 [18:45<20:16,  1.10s/it]

Training_loss 15.69264


 45%|████▍     | 896/2000 [18:46<19:36,  1.07s/it]

Training_loss 15.67442


 45%|████▍     | 897/2000 [18:47<20:07,  1.09s/it]

Training_loss 15.66913


 45%|████▍     | 898/2000 [18:48<20:00,  1.09s/it]

Training_loss 15.69266


 45%|████▍     | 899/2000 [18:50<20:24,  1.11s/it]

Training_loss 15.70934


 45%|████▌     | 900/2000 [18:50<19:25,  1.06s/it]

Training_loss 15.72008


 45%|████▌     | 901/2000 [18:51<18:50,  1.03s/it]

Training_loss 15.73495


 45%|████▌     | 902/2000 [18:52<19:01,  1.04s/it]

Training_loss 15.74677


 45%|████▌     | 903/2000 [18:54<19:06,  1.04s/it]

Training_loss 15.73747


 45%|████▌     | 904/2000 [18:55<19:29,  1.07s/it]

Training_loss 15.73019


 45%|████▌     | 905/2000 [18:56<20:00,  1.10s/it]

Training_loss 15.75303


 45%|████▌     | 906/2000 [18:57<19:27,  1.07s/it]

Training_loss 15.73191


 45%|████▌     | 907/2000 [18:58<18:49,  1.03s/it]

Training_loss 15.72409


 45%|████▌     | 908/2000 [18:59<18:37,  1.02s/it]

Training_loss 15.74291


 45%|████▌     | 909/2000 [19:00<18:10,  1.00it/s]

Training_loss 15.74636


 46%|████▌     | 910/2000 [19:01<17:51,  1.02it/s]

Training_loss 15.77146


 46%|████▌     | 911/2000 [19:02<17:53,  1.01it/s]

Training_loss 15.76878


 46%|████▌     | 912/2000 [19:03<18:23,  1.01s/it]

Training_loss 15.76523


 46%|████▌     | 913/2000 [19:04<18:31,  1.02s/it]

Training_loss 15.77370


 46%|████▌     | 914/2000 [19:05<19:13,  1.06s/it]

Training_loss 15.75530


 46%|████▌     | 915/2000 [19:06<19:55,  1.10s/it]

Training_loss 15.76660


 46%|████▌     | 916/2000 [19:08<25:18,  1.40s/it]

Training_loss 15.75500


 46%|████▌     | 917/2000 [19:10<26:01,  1.44s/it]

Training_loss 15.76734


 46%|████▌     | 918/2000 [19:11<26:21,  1.46s/it]

Training_loss 15.76894


 46%|████▌     | 919/2000 [19:13<25:32,  1.42s/it]

Training_loss 15.78495


 46%|████▌     | 920/2000 [19:14<25:52,  1.44s/it]

Training_loss 15.76512


 46%|████▌     | 921/2000 [19:16<27:23,  1.52s/it]

Training_loss 15.75478


 46%|████▌     | 922/2000 [19:17<25:23,  1.41s/it]

Training_loss 15.74697


 46%|████▌     | 923/2000 [19:18<23:54,  1.33s/it]

Training_loss 15.74517


 46%|████▌     | 924/2000 [19:20<24:21,  1.36s/it]

Training_loss 15.75106


 46%|████▋     | 925/2000 [19:21<24:44,  1.38s/it]

Training_loss 15.74142


 46%|████▋     | 926/2000 [19:22<24:18,  1.36s/it]

Training_loss 15.75695


 46%|████▋     | 927/2000 [19:23<23:40,  1.32s/it]

Training_loss 15.75944


 46%|████▋     | 928/2000 [19:25<24:36,  1.38s/it]

Training_loss 15.78658


 46%|████▋     | 929/2000 [19:28<30:57,  1.73s/it]

Training_loss 15.81663


 46%|████▋     | 930/2000 [19:29<31:24,  1.76s/it]

Training_loss 15.83360


 47%|████▋     | 931/2000 [19:31<31:10,  1.75s/it]

Training_loss 15.84671


 47%|████▋     | 932/2000 [19:32<28:05,  1.58s/it]

Training_loss 15.86091


 47%|████▋     | 933/2000 [19:33<25:54,  1.46s/it]

Training_loss 15.86810


 47%|████▋     | 934/2000 [19:35<28:28,  1.60s/it]

Training_loss 15.85925


 47%|████▋     | 935/2000 [19:37<27:56,  1.57s/it]

Training_loss 15.83877


 47%|████▋     | 936/2000 [19:38<26:19,  1.48s/it]

Training_loss 15.86060


 47%|████▋     | 937/2000 [19:39<24:29,  1.38s/it]

Training_loss 15.89470


 47%|████▋     | 938/2000 [19:40<23:06,  1.31s/it]

Training_loss 15.89279


 47%|████▋     | 939/2000 [19:41<21:07,  1.19s/it]

Training_loss 15.90011


 47%|████▋     | 940/2000 [19:43<21:18,  1.21s/it]

Training_loss 15.91100


 47%|████▋     | 941/2000 [19:44<21:37,  1.23s/it]

Training_loss 15.94126


 47%|████▋     | 942/2000 [19:45<20:18,  1.15s/it]

Training_loss 15.90549


 47%|████▋     | 943/2000 [19:46<20:11,  1.15s/it]

Training_loss 15.90508


 47%|████▋     | 944/2000 [19:47<20:45,  1.18s/it]

Training_loss 15.91634


 47%|████▋     | 945/2000 [19:48<19:34,  1.11s/it]

Training_loss 15.93222


 47%|████▋     | 946/2000 [19:49<19:04,  1.09s/it]

Training_loss 15.93138


 47%|████▋     | 947/2000 [19:50<18:34,  1.06s/it]

Training_loss 15.94397


 47%|████▋     | 948/2000 [19:51<19:08,  1.09s/it]

Training_loss 15.94376


 47%|████▋     | 949/2000 [19:53<19:39,  1.12s/it]

Training_loss 15.94630


 48%|████▊     | 950/2000 [19:54<20:20,  1.16s/it]

Training_loss 15.93524


 48%|████▊     | 951/2000 [19:55<20:05,  1.15s/it]

Training_loss 15.91312


 48%|████▊     | 952/2000 [19:56<19:38,  1.12s/it]

Training_loss 15.90401


 48%|████▊     | 953/2000 [19:57<19:57,  1.14s/it]

Training_loss 15.91785


 48%|████▊     | 954/2000 [19:59<20:42,  1.19s/it]

Training_loss 15.90492


 48%|████▊     | 955/2000 [20:01<24:48,  1.42s/it]

Training_loss 15.91367


 48%|████▊     | 956/2000 [20:02<23:13,  1.33s/it]

Training_loss 15.91251


 48%|████▊     | 957/2000 [20:03<22:13,  1.28s/it]

Training_loss 15.90827


 48%|████▊     | 958/2000 [20:04<22:19,  1.29s/it]

Training_loss 15.89878


 48%|████▊     | 959/2000 [20:05<21:26,  1.24s/it]

Training_loss 15.87727


 48%|████▊     | 960/2000 [20:06<21:36,  1.25s/it]

Training_loss 15.86722


 48%|████▊     | 961/2000 [20:08<22:09,  1.28s/it]

Training_loss 15.86855


 48%|████▊     | 962/2000 [20:09<22:53,  1.32s/it]

Training_loss 15.87334


 48%|████▊     | 963/2000 [20:11<24:23,  1.41s/it]

Training_loss 15.88786


 48%|████▊     | 964/2000 [20:12<22:47,  1.32s/it]

Training_loss 15.90582


 48%|████▊     | 965/2000 [20:13<21:27,  1.24s/it]

Training_loss 15.90473


 48%|████▊     | 966/2000 [20:14<20:28,  1.19s/it]

Training_loss 15.89689


 48%|████▊     | 967/2000 [20:15<21:21,  1.24s/it]

Training_loss 15.88774


 48%|████▊     | 968/2000 [20:17<22:17,  1.30s/it]

Training_loss 15.88870


 48%|████▊     | 969/2000 [20:18<21:25,  1.25s/it]

Training_loss 15.86990


 48%|████▊     | 970/2000 [20:19<21:06,  1.23s/it]

Training_loss 15.85969


 49%|████▊     | 971/2000 [20:22<26:50,  1.57s/it]

Training_loss 15.86953


 49%|████▊     | 972/2000 [20:23<25:28,  1.49s/it]

Training_loss 15.86306


 49%|████▊     | 973/2000 [20:24<24:09,  1.41s/it]

Training_loss 15.86099


 49%|████▊     | 974/2000 [20:25<23:48,  1.39s/it]

Training_loss 15.86043


 49%|████▉     | 975/2000 [20:27<23:36,  1.38s/it]

Training_loss 15.84332


 49%|████▉     | 976/2000 [20:29<25:17,  1.48s/it]

Training_loss 15.84972


 49%|████▉     | 977/2000 [20:32<36:43,  2.15s/it]

Training_loss 15.84636


 49%|████▉     | 978/2000 [20:35<42:14,  2.48s/it]

Training_loss 15.84530


 49%|████▉     | 979/2000 [20:38<43:08,  2.54s/it]

Training_loss 15.84540


 49%|████▉     | 980/2000 [20:41<42:51,  2.52s/it]

Training_loss 15.82332


 49%|████▉     | 981/2000 [20:43<42:51,  2.52s/it]

Training_loss 15.81811


 49%|████▉     | 982/2000 [20:45<39:33,  2.33s/it]

Training_loss 15.82959


 49%|████▉     | 983/2000 [20:47<37:50,  2.23s/it]

Training_loss 15.81078


 49%|████▉     | 984/2000 [20:49<37:05,  2.19s/it]

Training_loss 15.82708


 49%|████▉     | 985/2000 [20:51<36:26,  2.15s/it]

Training_loss 15.84161


 49%|████▉     | 986/2000 [20:53<35:52,  2.12s/it]

Training_loss 15.84117


 49%|████▉     | 987/2000 [20:55<33:12,  1.97s/it]

Training_loss 15.82262


 49%|████▉     | 988/2000 [20:56<29:02,  1.72s/it]

Training_loss 15.82740


 49%|████▉     | 989/2000 [20:57<25:06,  1.49s/it]

Training_loss 15.81097


 50%|████▉     | 990/2000 [20:58<22:17,  1.32s/it]

Training_loss 15.79358


 50%|████▉     | 991/2000 [20:59<20:05,  1.20s/it]

Training_loss 15.76251


 50%|████▉     | 992/2000 [21:00<18:33,  1.10s/it]

Training_loss 15.74385


 50%|████▉     | 993/2000 [21:01<17:54,  1.07s/it]

Training_loss 15.75311


 50%|████▉     | 994/2000 [21:02<17:12,  1.03s/it]

Training_loss 15.75693


 50%|████▉     | 995/2000 [21:03<17:47,  1.06s/it]

Training_loss 15.75025


 50%|████▉     | 996/2000 [21:04<18:00,  1.08s/it]

Training_loss 15.74451


 50%|████▉     | 997/2000 [21:05<17:19,  1.04s/it]

Training_loss 15.74453


 50%|████▉     | 998/2000 [21:06<16:35,  1.01it/s]

Training_loss 15.74755


 50%|████▉     | 999/2000 [21:07<16:21,  1.02it/s]

Training_loss 15.74274


 50%|█████     | 1000/2000 [21:08<16:06,  1.03it/s]

Training_loss 15.74489


 50%|█████     | 1001/2000 [21:09<17:53,  1.07s/it]

Training_loss 15.75828


 50%|█████     | 1002/2000 [21:10<19:50,  1.19s/it]

Training_loss 15.78968


 50%|█████     | 1003/2000 [21:11<19:18,  1.16s/it]

Training_loss 15.79169


 50%|█████     | 1004/2000 [21:12<18:35,  1.12s/it]

Training_loss 15.78271


 50%|█████     | 1005/2000 [21:14<19:02,  1.15s/it]

Training_loss 15.75335


 50%|█████     | 1006/2000 [21:15<18:43,  1.13s/it]

Training_loss 15.75418


 50%|█████     | 1007/2000 [21:16<17:57,  1.08s/it]

Training_loss 15.76367


 50%|█████     | 1008/2000 [21:17<17:27,  1.06s/it]

Training_loss 15.76600


 50%|█████     | 1009/2000 [21:18<16:42,  1.01s/it]

Training_loss 15.75910


 50%|█████     | 1010/2000 [21:19<17:37,  1.07s/it]

Training_loss 15.77074


 51%|█████     | 1011/2000 [21:20<17:52,  1.08s/it]

Training_loss 15.77386


 51%|█████     | 1012/2000 [21:21<17:06,  1.04s/it]

Training_loss 15.76727


 51%|█████     | 1013/2000 [21:22<16:40,  1.01s/it]

Training_loss 15.73044


 51%|█████     | 1014/2000 [21:23<16:24,  1.00it/s]

Training_loss 15.70729


 51%|█████     | 1015/2000 [21:24<16:16,  1.01it/s]

Training_loss 15.71150


 51%|█████     | 1016/2000 [21:25<15:56,  1.03it/s]

Training_loss 15.72091


 51%|█████     | 1017/2000 [21:26<15:35,  1.05it/s]

Training_loss 15.72281


 51%|█████     | 1018/2000 [21:27<15:08,  1.08it/s]

Training_loss 15.74135


 51%|█████     | 1019/2000 [21:27<15:06,  1.08it/s]

Training_loss 15.75329


 51%|█████     | 1020/2000 [21:28<15:10,  1.08it/s]

Training_loss 15.76083


 51%|█████     | 1021/2000 [21:29<14:54,  1.09it/s]

Training_loss 15.79437


 51%|█████     | 1022/2000 [21:30<14:50,  1.10it/s]

Training_loss 15.78459


 51%|█████     | 1023/2000 [21:31<14:34,  1.12it/s]

Training_loss 15.78653


 51%|█████     | 1024/2000 [21:32<14:36,  1.11it/s]

Training_loss 15.78633


 51%|█████▏    | 1025/2000 [21:33<14:28,  1.12it/s]

Training_loss 15.79094


 51%|█████▏    | 1026/2000 [21:34<14:54,  1.09it/s]

Training_loss 15.78043


 51%|█████▏    | 1027/2000 [21:35<16:05,  1.01it/s]

Training_loss 15.78356


 51%|█████▏    | 1028/2000 [21:36<16:34,  1.02s/it]

Training_loss 15.77154


 51%|█████▏    | 1029/2000 [21:37<15:54,  1.02it/s]

Training_loss 15.78149


 52%|█████▏    | 1030/2000 [21:38<15:58,  1.01it/s]

Training_loss 15.79136


 52%|█████▏    | 1031/2000 [21:39<16:18,  1.01s/it]

Training_loss 15.79191


 52%|█████▏    | 1032/2000 [21:40<16:01,  1.01it/s]

Training_loss 15.78606


 52%|█████▏    | 1033/2000 [21:41<15:51,  1.02it/s]

Training_loss 15.80278


 52%|█████▏    | 1034/2000 [21:42<16:52,  1.05s/it]

Training_loss 15.81613


 52%|█████▏    | 1035/2000 [21:43<17:13,  1.07s/it]

Training_loss 15.82745


 52%|█████▏    | 1036/2000 [21:44<17:01,  1.06s/it]

Training_loss 15.82883


 52%|█████▏    | 1037/2000 [21:45<17:18,  1.08s/it]

Training_loss 15.83170


 52%|█████▏    | 1038/2000 [21:46<17:31,  1.09s/it]

Training_loss 15.82623


 52%|█████▏    | 1039/2000 [21:48<17:31,  1.09s/it]

Training_loss 15.79907


 52%|█████▏    | 1040/2000 [21:49<17:33,  1.10s/it]

Training_loss 15.81113


 52%|█████▏    | 1041/2000 [21:50<18:19,  1.15s/it]

Training_loss 15.80492


 52%|█████▏    | 1042/2000 [21:52<21:29,  1.35s/it]

Training_loss 15.81077


 52%|█████▏    | 1043/2000 [21:53<22:07,  1.39s/it]

Training_loss 15.79503


 52%|█████▏    | 1044/2000 [21:55<21:31,  1.35s/it]

Training_loss 15.79298


 52%|█████▏    | 1045/2000 [21:56<21:06,  1.33s/it]

Training_loss 15.80784


 52%|█████▏    | 1046/2000 [21:57<20:07,  1.27s/it]

Training_loss 15.82749


 52%|█████▏    | 1047/2000 [21:58<18:44,  1.18s/it]

Training_loss 15.81364


 52%|█████▏    | 1048/2000 [21:59<17:31,  1.10s/it]

Training_loss 15.81682


 52%|█████▏    | 1049/2000 [22:00<16:24,  1.04s/it]

Training_loss 15.82119


 52%|█████▎    | 1050/2000 [22:01<16:08,  1.02s/it]

Training_loss 15.84744


 53%|█████▎    | 1051/2000 [22:02<15:58,  1.01s/it]

Training_loss 15.85672


 53%|█████▎    | 1052/2000 [22:03<15:36,  1.01it/s]

Training_loss 15.85418


 53%|█████▎    | 1053/2000 [22:04<15:09,  1.04it/s]

Training_loss 15.85270


 53%|█████▎    | 1054/2000 [22:04<15:17,  1.03it/s]

Training_loss 15.86867


 53%|█████▎    | 1055/2000 [22:05<15:03,  1.05it/s]

Training_loss 15.87076


 53%|█████▎    | 1056/2000 [22:07<15:53,  1.01s/it]

Training_loss 15.84630


 53%|█████▎    | 1057/2000 [22:08<16:35,  1.06s/it]

Training_loss 15.83965


 53%|█████▎    | 1058/2000 [22:09<16:33,  1.06s/it]

Training_loss 15.83810


 53%|█████▎    | 1059/2000 [22:10<16:54,  1.08s/it]

Training_loss 15.85868


 53%|█████▎    | 1060/2000 [22:11<16:13,  1.04s/it]

Training_loss 15.87984


 53%|█████▎    | 1061/2000 [22:12<15:30,  1.01it/s]

Training_loss 15.91766


 53%|█████▎    | 1062/2000 [22:13<15:19,  1.02it/s]

Training_loss 15.92600


 53%|█████▎    | 1063/2000 [22:14<15:02,  1.04it/s]

Training_loss 15.91884


 53%|█████▎    | 1064/2000 [22:15<14:57,  1.04it/s]

Training_loss 15.92385


 53%|█████▎    | 1065/2000 [22:15<14:47,  1.05it/s]

Training_loss 15.92979


 53%|█████▎    | 1066/2000 [22:16<15:03,  1.03it/s]

Training_loss 15.94290


 53%|█████▎    | 1067/2000 [22:18<15:16,  1.02it/s]

Training_loss 15.94941


 53%|█████▎    | 1068/2000 [22:18<15:15,  1.02it/s]

Training_loss 15.97872


 53%|█████▎    | 1069/2000 [22:19<15:16,  1.02it/s]

Training_loss 15.98746


 54%|█████▎    | 1070/2000 [22:20<15:22,  1.01it/s]

Training_loss 15.99819


 54%|█████▎    | 1071/2000 [22:21<15:11,  1.02it/s]

Training_loss 16.01483


 54%|█████▎    | 1072/2000 [22:23<16:07,  1.04s/it]

Training_loss 16.02209


 54%|█████▎    | 1073/2000 [22:24<16:38,  1.08s/it]

Training_loss 16.01093


 54%|█████▎    | 1074/2000 [22:25<16:02,  1.04s/it]

Training_loss 16.02720


 54%|█████▍    | 1075/2000 [22:26<15:31,  1.01s/it]

Training_loss 16.02444


 54%|█████▍    | 1076/2000 [22:27<15:07,  1.02it/s]

Training_loss 16.04417


 54%|█████▍    | 1077/2000 [22:28<15:10,  1.01it/s]

Training_loss 16.04300


 54%|█████▍    | 1078/2000 [22:29<14:57,  1.03it/s]

Training_loss 16.03887


 54%|█████▍    | 1079/2000 [22:29<14:49,  1.03it/s]

Training_loss 16.05676


 54%|█████▍    | 1080/2000 [22:30<14:36,  1.05it/s]

Training_loss 16.04745


 54%|█████▍    | 1081/2000 [22:31<15:09,  1.01it/s]

Training_loss 16.02831


 54%|█████▍    | 1082/2000 [22:32<14:54,  1.03it/s]

Training_loss 16.00804


 54%|█████▍    | 1083/2000 [22:33<14:37,  1.05it/s]

Training_loss 15.99414


 54%|█████▍    | 1084/2000 [22:34<14:19,  1.07it/s]

Training_loss 15.99940


 54%|█████▍    | 1085/2000 [22:35<14:40,  1.04it/s]

Training_loss 16.00585


 54%|█████▍    | 1086/2000 [22:36<14:23,  1.06it/s]

Training_loss 15.99347


 54%|█████▍    | 1087/2000 [22:37<14:28,  1.05it/s]

Training_loss 16.01224


 54%|█████▍    | 1088/2000 [22:38<14:33,  1.04it/s]

Training_loss 16.01001


 54%|█████▍    | 1089/2000 [22:39<15:46,  1.04s/it]

Training_loss 16.02682


 55%|█████▍    | 1090/2000 [22:40<15:44,  1.04s/it]

Training_loss 16.01188


 55%|█████▍    | 1091/2000 [22:41<15:05,  1.00it/s]

Training_loss 16.02474


 55%|█████▍    | 1092/2000 [22:42<15:08,  1.00s/it]

Training_loss 16.01345


 55%|█████▍    | 1093/2000 [22:43<14:55,  1.01it/s]

Training_loss 16.02322


 55%|█████▍    | 1094/2000 [22:44<14:44,  1.02it/s]

Training_loss 16.03601


 55%|█████▍    | 1095/2000 [22:45<14:30,  1.04it/s]

Training_loss 16.01029


 55%|█████▍    | 1096/2000 [22:46<14:03,  1.07it/s]

Training_loss 15.99713


 55%|█████▍    | 1097/2000 [22:47<14:09,  1.06it/s]

Training_loss 15.97965


 55%|█████▍    | 1098/2000 [22:48<13:54,  1.08it/s]

Training_loss 16.00351


 55%|█████▍    | 1099/2000 [22:49<13:51,  1.08it/s]

Training_loss 15.98935


 55%|█████▌    | 1100/2000 [22:50<13:41,  1.10it/s]

Training_loss 15.98716


 55%|█████▌    | 1101/2000 [22:51<13:50,  1.08it/s]

Training_loss 16.01406


 55%|█████▌    | 1102/2000 [22:52<14:03,  1.07it/s]

Training_loss 16.02194


 55%|█████▌    | 1103/2000 [22:53<14:56,  1.00it/s]

Training_loss 16.00322


 55%|█████▌    | 1104/2000 [22:54<16:03,  1.07s/it]

Training_loss 15.99770


 55%|█████▌    | 1105/2000 [22:55<18:10,  1.22s/it]

Training_loss 15.98481


 55%|█████▌    | 1106/2000 [22:57<18:29,  1.24s/it]

Training_loss 15.99332


 55%|█████▌    | 1107/2000 [22:58<17:35,  1.18s/it]

Training_loss 15.97742


 55%|█████▌    | 1108/2000 [22:59<16:32,  1.11s/it]

Training_loss 16.00630


 55%|█████▌    | 1109/2000 [23:00<15:37,  1.05s/it]

Training_loss 15.99734


 56%|█████▌    | 1110/2000 [23:01<15:12,  1.03s/it]

Training_loss 16.00469


 56%|█████▌    | 1111/2000 [23:02<15:19,  1.03s/it]

Training_loss 16.01122


 56%|█████▌    | 1112/2000 [23:03<14:35,  1.01it/s]

Training_loss 16.01438


 56%|█████▌    | 1113/2000 [23:03<14:17,  1.03it/s]

Training_loss 16.01824


 56%|█████▌    | 1114/2000 [23:04<13:50,  1.07it/s]

Training_loss 16.02680


 56%|█████▌    | 1115/2000 [23:05<13:56,  1.06it/s]

Training_loss 16.00593


 56%|█████▌    | 1116/2000 [23:06<13:34,  1.09it/s]

Training_loss 16.02884


 56%|█████▌    | 1117/2000 [23:07<14:04,  1.05it/s]

Training_loss 16.04230


 56%|█████▌    | 1118/2000 [23:08<14:42,  1.00s/it]

Training_loss 16.04872


 56%|█████▌    | 1119/2000 [23:09<14:41,  1.00s/it]

Training_loss 16.05308


 56%|█████▌    | 1120/2000 [23:10<14:48,  1.01s/it]

Training_loss 16.07301


 56%|█████▌    | 1121/2000 [23:12<15:34,  1.06s/it]

Training_loss 16.06105


 56%|█████▌    | 1122/2000 [23:13<15:26,  1.06s/it]

Training_loss 16.04918


 56%|█████▌    | 1123/2000 [23:14<14:46,  1.01s/it]

Training_loss 16.03265


 56%|█████▌    | 1124/2000 [23:14<14:31,  1.01it/s]

Training_loss 16.05128


 56%|█████▋    | 1125/2000 [23:15<14:22,  1.01it/s]

Training_loss 16.02950


 56%|█████▋    | 1126/2000 [23:16<14:33,  1.00it/s]

Training_loss 16.01844


 56%|█████▋    | 1127/2000 [23:17<14:37,  1.01s/it]

Training_loss 16.03696


 56%|█████▋    | 1128/2000 [23:18<14:37,  1.01s/it]

Training_loss 16.07193


 56%|█████▋    | 1129/2000 [23:19<14:01,  1.03it/s]

Training_loss 16.06168


 56%|█████▋    | 1130/2000 [23:20<13:54,  1.04it/s]

Training_loss 16.05314


 57%|█████▋    | 1131/2000 [23:21<13:41,  1.06it/s]

Training_loss 16.05594


 57%|█████▋    | 1132/2000 [23:22<13:41,  1.06it/s]

Training_loss 16.03359


 57%|█████▋    | 1133/2000 [23:23<13:44,  1.05it/s]

Training_loss 16.05022


 57%|█████▋    | 1134/2000 [23:24<13:55,  1.04it/s]

Training_loss 16.06560


 57%|█████▋    | 1135/2000 [23:25<13:52,  1.04it/s]

Training_loss 16.06393


 57%|█████▋    | 1136/2000 [23:26<13:59,  1.03it/s]

Training_loss 16.05544


 57%|█████▋    | 1137/2000 [23:27<15:24,  1.07s/it]

Training_loss 16.04178


 57%|█████▋    | 1138/2000 [23:28<15:09,  1.05s/it]

Training_loss 16.03996


 57%|█████▋    | 1139/2000 [23:29<14:31,  1.01s/it]

Training_loss 16.07071


 57%|█████▋    | 1140/2000 [23:30<14:02,  1.02it/s]

Training_loss 16.05406


 57%|█████▋    | 1141/2000 [23:31<13:40,  1.05it/s]

Training_loss 16.05539


 57%|█████▋    | 1142/2000 [23:32<13:55,  1.03it/s]

Training_loss 16.05315


 57%|█████▋    | 1143/2000 [23:33<13:25,  1.06it/s]

Training_loss 16.02753


 57%|█████▋    | 1144/2000 [23:34<13:24,  1.06it/s]

Training_loss 16.03061


 57%|█████▋    | 1145/2000 [23:35<13:33,  1.05it/s]

Training_loss 15.98702


 57%|█████▋    | 1146/2000 [23:36<13:29,  1.06it/s]

Training_loss 16.00460


 57%|█████▋    | 1147/2000 [23:37<13:16,  1.07it/s]

Training_loss 16.00525


 57%|█████▋    | 1148/2000 [23:38<13:26,  1.06it/s]

Training_loss 15.97534


 57%|█████▋    | 1149/2000 [23:39<13:35,  1.04it/s]

Training_loss 15.98852


 57%|█████▊    | 1150/2000 [23:40<13:25,  1.06it/s]

Training_loss 16.00698


 58%|█████▊    | 1151/2000 [23:41<13:24,  1.06it/s]

Training_loss 16.00208


 58%|█████▊    | 1152/2000 [23:41<13:13,  1.07it/s]

Training_loss 16.00986


 58%|█████▊    | 1153/2000 [23:43<14:02,  1.01it/s]

Training_loss 16.01268


 58%|█████▊    | 1154/2000 [23:44<14:35,  1.03s/it]

Training_loss 16.02554


 58%|█████▊    | 1155/2000 [23:45<14:21,  1.02s/it]

Training_loss 16.01367


 58%|█████▊    | 1156/2000 [23:46<14:11,  1.01s/it]

Training_loss 16.02173


 58%|█████▊    | 1157/2000 [23:47<13:49,  1.02it/s]

Training_loss 16.02696


 58%|█████▊    | 1158/2000 [23:48<13:48,  1.02it/s]

Training_loss 16.00287


 58%|█████▊    | 1159/2000 [23:49<13:28,  1.04it/s]

Training_loss 16.01508


 58%|█████▊    | 1160/2000 [23:49<13:12,  1.06it/s]

Training_loss 16.02022


 58%|█████▊    | 1161/2000 [23:50<13:04,  1.07it/s]

Training_loss 16.00920


 58%|█████▊    | 1162/2000 [23:51<12:52,  1.08it/s]

Training_loss 16.01006


 58%|█████▊    | 1163/2000 [23:52<13:06,  1.06it/s]

Training_loss 16.02034


 58%|█████▊    | 1164/2000 [23:53<13:43,  1.02it/s]

Training_loss 16.01821


 58%|█████▊    | 1165/2000 [23:55<16:25,  1.18s/it]

Training_loss 16.01760


 58%|█████▊    | 1166/2000 [23:57<18:08,  1.31s/it]

Training_loss 16.03885


 58%|█████▊    | 1167/2000 [23:58<19:04,  1.37s/it]

Training_loss 16.05442


 58%|█████▊    | 1168/2000 [24:00<20:00,  1.44s/it]

Training_loss 16.05065


 58%|█████▊    | 1169/2000 [24:01<20:22,  1.47s/it]

Training_loss 16.04263


 58%|█████▊    | 1170/2000 [24:02<18:51,  1.36s/it]

Training_loss 16.02655


 59%|█████▊    | 1171/2000 [24:03<17:18,  1.25s/it]

Training_loss 16.03844


 59%|█████▊    | 1172/2000 [24:04<15:56,  1.15s/it]

Training_loss 16.03767


 59%|█████▊    | 1173/2000 [24:05<14:48,  1.07s/it]

Training_loss 16.03095


 59%|█████▊    | 1174/2000 [24:06<14:10,  1.03s/it]

Training_loss 16.04009


 59%|█████▉    | 1175/2000 [24:07<13:34,  1.01it/s]

Training_loss 16.01038


 59%|█████▉    | 1176/2000 [24:08<13:31,  1.02it/s]

Training_loss 15.99912


 59%|█████▉    | 1177/2000 [24:09<14:08,  1.03s/it]

Training_loss 15.99485


 59%|█████▉    | 1178/2000 [24:10<14:45,  1.08s/it]

Training_loss 16.00037


 59%|█████▉    | 1179/2000 [24:11<15:07,  1.11s/it]

Training_loss 15.98811


 59%|█████▉    | 1180/2000 [24:12<14:51,  1.09s/it]

Training_loss 15.98434


 59%|█████▉    | 1181/2000 [24:13<14:30,  1.06s/it]

Training_loss 16.01019


 59%|█████▉    | 1182/2000 [24:15<15:26,  1.13s/it]

Training_loss 15.98931


 59%|█████▉    | 1183/2000 [24:16<15:11,  1.12s/it]

Training_loss 15.98563


 59%|█████▉    | 1184/2000 [24:17<14:34,  1.07s/it]

Training_loss 16.00513


 59%|█████▉    | 1185/2000 [24:18<14:23,  1.06s/it]

Training_loss 15.99024


 59%|█████▉    | 1186/2000 [24:19<14:06,  1.04s/it]

Training_loss 15.98911


 59%|█████▉    | 1187/2000 [24:20<15:45,  1.16s/it]

Training_loss 15.98101


 59%|█████▉    | 1188/2000 [24:22<16:24,  1.21s/it]

Training_loss 15.98443


 59%|█████▉    | 1189/2000 [24:23<15:18,  1.13s/it]

Training_loss 15.99492


 60%|█████▉    | 1190/2000 [24:24<14:54,  1.10s/it]

Training_loss 15.98595


 60%|█████▉    | 1191/2000 [24:24<14:02,  1.04s/it]

Training_loss 15.96240


 60%|█████▉    | 1192/2000 [24:25<13:33,  1.01s/it]

Training_loss 15.93805


 60%|█████▉    | 1193/2000 [24:26<13:15,  1.01it/s]

Training_loss 15.95144


 60%|█████▉    | 1194/2000 [24:27<13:06,  1.02it/s]

Training_loss 15.94201


 60%|█████▉    | 1195/2000 [24:28<12:50,  1.04it/s]

Training_loss 15.93090


 60%|█████▉    | 1196/2000 [24:29<12:39,  1.06it/s]

Training_loss 15.92115


 60%|█████▉    | 1197/2000 [24:30<12:58,  1.03it/s]

Training_loss 15.91491


 60%|█████▉    | 1198/2000 [24:31<13:46,  1.03s/it]

Training_loss 15.90518


 60%|█████▉    | 1199/2000 [24:32<13:32,  1.01s/it]

Training_loss 15.89030


 60%|██████    | 1200/2000 [24:33<13:00,  1.02it/s]

Training_loss 15.88290


 60%|██████    | 1201/2000 [24:34<12:40,  1.05it/s]

Training_loss 15.85337


 60%|██████    | 1202/2000 [24:35<12:22,  1.07it/s]

Training_loss 15.83242


 60%|██████    | 1203/2000 [24:36<12:36,  1.05it/s]

Training_loss 15.84686


 60%|██████    | 1204/2000 [24:37<12:36,  1.05it/s]

Training_loss 15.86573


 60%|██████    | 1205/2000 [24:38<12:34,  1.05it/s]

Training_loss 15.87069


 60%|██████    | 1206/2000 [24:39<12:26,  1.06it/s]

Training_loss 15.85784


 60%|██████    | 1207/2000 [24:40<12:06,  1.09it/s]

Training_loss 15.84648


 60%|██████    | 1208/2000 [24:41<12:10,  1.08it/s]

Training_loss 15.86065


 60%|██████    | 1209/2000 [24:41<11:54,  1.11it/s]

Training_loss 15.85670


 60%|██████    | 1210/2000 [24:42<12:26,  1.06it/s]

Training_loss 15.87949


 61%|██████    | 1211/2000 [24:43<12:31,  1.05it/s]

Training_loss 15.90153


 61%|██████    | 1212/2000 [24:44<12:24,  1.06it/s]

Training_loss 15.89665


 61%|██████    | 1213/2000 [24:45<12:16,  1.07it/s]

Training_loss 15.90745


 61%|██████    | 1214/2000 [24:46<12:46,  1.03it/s]

Training_loss 15.90241


 61%|██████    | 1215/2000 [24:48<13:53,  1.06s/it]

Training_loss 15.89346


 61%|██████    | 1216/2000 [24:49<13:30,  1.03s/it]

Training_loss 15.90321


 61%|██████    | 1217/2000 [24:49<12:49,  1.02it/s]

Training_loss 15.90916


 61%|██████    | 1218/2000 [24:50<12:37,  1.03it/s]

Training_loss 15.92854


 61%|██████    | 1219/2000 [24:51<12:26,  1.05it/s]

Training_loss 15.91789


 61%|██████    | 1220/2000 [24:52<12:16,  1.06it/s]

Training_loss 15.92069


 61%|██████    | 1221/2000 [24:53<12:16,  1.06it/s]

Training_loss 15.93085


 61%|██████    | 1222/2000 [24:54<12:51,  1.01it/s]

Training_loss 15.96299


 61%|██████    | 1223/2000 [24:55<12:37,  1.03it/s]

Training_loss 15.96108


 61%|██████    | 1224/2000 [24:56<12:41,  1.02it/s]

Training_loss 15.96832


 61%|██████▏   | 1225/2000 [24:57<12:49,  1.01it/s]

Training_loss 15.94331


 61%|██████▏   | 1226/2000 [24:58<13:00,  1.01s/it]

Training_loss 15.98027


 61%|██████▏   | 1227/2000 [24:59<12:32,  1.03it/s]

Training_loss 16.00315


 61%|██████▏   | 1228/2000 [25:00<12:12,  1.05it/s]

Training_loss 16.02836


 61%|██████▏   | 1229/2000 [25:01<12:04,  1.06it/s]

Training_loss 16.04537


 62%|██████▏   | 1230/2000 [25:02<11:59,  1.07it/s]

Training_loss 16.03731


 62%|██████▏   | 1231/2000 [25:03<13:11,  1.03s/it]

Training_loss 16.04022


 62%|██████▏   | 1232/2000 [25:04<12:49,  1.00s/it]

Training_loss 16.04432


 62%|██████▏   | 1233/2000 [25:05<12:31,  1.02it/s]

Training_loss 16.05927


 62%|██████▏   | 1234/2000 [25:06<12:09,  1.05it/s]

Training_loss 16.06501


 62%|██████▏   | 1235/2000 [25:07<12:00,  1.06it/s]

Training_loss 16.06793


 62%|██████▏   | 1236/2000 [25:08<12:38,  1.01it/s]

Training_loss 16.07389


 62%|██████▏   | 1237/2000 [25:09<13:20,  1.05s/it]

Training_loss 16.07321


 62%|██████▏   | 1238/2000 [25:10<12:57,  1.02s/it]

Training_loss 16.07167


 62%|██████▏   | 1239/2000 [25:11<12:37,  1.01it/s]

Training_loss 16.09664


 62%|██████▏   | 1240/2000 [25:12<12:08,  1.04it/s]

Training_loss 16.10565


 62%|██████▏   | 1241/2000 [25:13<12:05,  1.05it/s]

Training_loss 16.09012


 62%|██████▏   | 1242/2000 [25:14<11:43,  1.08it/s]

Training_loss 16.10601


 62%|██████▏   | 1243/2000 [25:15<11:50,  1.07it/s]

Training_loss 16.11463


 62%|██████▏   | 1244/2000 [25:16<11:46,  1.07it/s]

Training_loss 16.11722


 62%|██████▏   | 1245/2000 [25:16<11:29,  1.09it/s]

Training_loss 16.10644


 62%|██████▏   | 1246/2000 [25:17<11:34,  1.09it/s]

Training_loss 16.12634


 62%|██████▏   | 1247/2000 [25:19<12:38,  1.01s/it]

Training_loss 16.14028


 62%|██████▏   | 1248/2000 [25:20<13:28,  1.08s/it]

Training_loss 16.15308


 62%|██████▏   | 1249/2000 [25:21<13:07,  1.05s/it]

Training_loss 16.16573


 62%|██████▎   | 1250/2000 [25:22<12:35,  1.01s/it]

Training_loss 16.18707


 63%|██████▎   | 1251/2000 [25:23<12:31,  1.00s/it]

Training_loss 16.19200


 63%|██████▎   | 1252/2000 [25:24<12:18,  1.01it/s]

Training_loss 16.18581


 63%|██████▎   | 1253/2000 [25:25<12:13,  1.02it/s]

Training_loss 16.18952


 63%|██████▎   | 1254/2000 [25:26<11:53,  1.05it/s]

Training_loss 16.18171


 63%|██████▎   | 1255/2000 [25:26<11:34,  1.07it/s]

Training_loss 16.16696


 63%|██████▎   | 1256/2000 [25:27<11:37,  1.07it/s]

Training_loss 16.17123


 63%|██████▎   | 1257/2000 [25:28<11:35,  1.07it/s]

Training_loss 16.14938


 63%|██████▎   | 1258/2000 [25:29<11:30,  1.07it/s]

Training_loss 16.16106


 63%|██████▎   | 1259/2000 [25:30<11:18,  1.09it/s]

Training_loss 16.17013


 63%|██████▎   | 1260/2000 [25:31<11:14,  1.10it/s]

Training_loss 16.19128


 63%|██████▎   | 1261/2000 [25:32<11:18,  1.09it/s]

Training_loss 16.18425


 63%|██████▎   | 1262/2000 [25:33<11:02,  1.11it/s]

Training_loss 16.20950


 63%|██████▎   | 1263/2000 [25:34<11:26,  1.07it/s]

Training_loss 16.23103


 63%|██████▎   | 1264/2000 [25:35<12:25,  1.01s/it]

Training_loss 16.22938


 63%|██████▎   | 1265/2000 [25:36<12:29,  1.02s/it]

Training_loss 16.23839


 63%|██████▎   | 1266/2000 [25:37<12:32,  1.03s/it]

Training_loss 16.21288


 63%|██████▎   | 1267/2000 [25:38<12:45,  1.04s/it]

Training_loss 16.23131


 63%|██████▎   | 1268/2000 [25:39<12:38,  1.04s/it]

Training_loss 16.23720


 63%|██████▎   | 1269/2000 [25:40<12:30,  1.03s/it]

Training_loss 16.24282


 64%|██████▎   | 1270/2000 [25:41<12:29,  1.03s/it]

Training_loss 16.23136


 64%|██████▎   | 1271/2000 [25:42<12:22,  1.02s/it]

Training_loss 16.22768


 64%|██████▎   | 1272/2000 [25:43<12:26,  1.03s/it]

Training_loss 16.22242


 64%|██████▎   | 1273/2000 [25:44<12:14,  1.01s/it]

Training_loss 16.22819


 64%|██████▎   | 1274/2000 [25:45<11:57,  1.01it/s]

Training_loss 16.25317


 64%|██████▍   | 1275/2000 [25:46<11:28,  1.05it/s]

Training_loss 16.27568


 64%|██████▍   | 1276/2000 [25:47<11:18,  1.07it/s]

Training_loss 16.29098


 64%|██████▍   | 1277/2000 [25:48<11:19,  1.06it/s]

Training_loss 16.25773


 64%|██████▍   | 1278/2000 [25:49<11:16,  1.07it/s]

Training_loss 16.24070


 64%|██████▍   | 1279/2000 [25:50<11:12,  1.07it/s]

Training_loss 16.25077


 64%|██████▍   | 1280/2000 [25:51<12:07,  1.01s/it]

Training_loss 16.26710


 64%|██████▍   | 1281/2000 [25:52<12:08,  1.01s/it]

Training_loss 16.27411


 64%|██████▍   | 1282/2000 [25:53<11:55,  1.00it/s]

Training_loss 16.25625


 64%|██████▍   | 1283/2000 [25:54<11:37,  1.03it/s]

Training_loss 16.23091


 64%|██████▍   | 1284/2000 [25:55<11:33,  1.03it/s]

Training_loss 16.24137


 64%|██████▍   | 1285/2000 [25:56<11:30,  1.04it/s]

Training_loss 16.26332


 64%|██████▍   | 1286/2000 [25:57<11:26,  1.04it/s]

Training_loss 16.26553


 64%|██████▍   | 1287/2000 [25:58<11:26,  1.04it/s]

Training_loss 16.27655


 64%|██████▍   | 1288/2000 [25:59<11:20,  1.05it/s]

Training_loss 16.28905


 64%|██████▍   | 1289/2000 [26:00<11:15,  1.05it/s]

Training_loss 16.26219


 64%|██████▍   | 1290/2000 [26:00<11:03,  1.07it/s]

Training_loss 16.24871


 65%|██████▍   | 1291/2000 [26:01<11:07,  1.06it/s]

Training_loss 16.24925


 65%|██████▍   | 1292/2000 [26:02<11:13,  1.05it/s]

Training_loss 16.23806


 65%|██████▍   | 1293/2000 [26:03<11:25,  1.03it/s]

Training_loss 16.23465


 65%|██████▍   | 1294/2000 [26:04<11:24,  1.03it/s]

Training_loss 16.20654


 65%|██████▍   | 1295/2000 [26:05<11:13,  1.05it/s]

Training_loss 16.20931


 65%|██████▍   | 1296/2000 [26:06<11:20,  1.03it/s]

Training_loss 16.20285


 65%|██████▍   | 1297/2000 [26:07<12:07,  1.03s/it]

Training_loss 16.19785


 65%|██████▍   | 1298/2000 [26:09<12:58,  1.11s/it]

Training_loss 16.17610


 65%|██████▍   | 1299/2000 [26:10<13:48,  1.18s/it]

Training_loss 16.16486


 65%|██████▌   | 1300/2000 [26:11<13:44,  1.18s/it]

Training_loss 16.12993


 65%|██████▌   | 1301/2000 [26:12<13:01,  1.12s/it]

Training_loss 16.14917


 65%|██████▌   | 1302/2000 [26:13<12:43,  1.09s/it]

Training_loss 16.15510


 65%|██████▌   | 1303/2000 [26:14<12:35,  1.08s/it]

Training_loss 16.16744


 65%|██████▌   | 1304/2000 [26:15<12:21,  1.07s/it]

Training_loss 16.16502


 65%|██████▌   | 1305/2000 [26:16<11:59,  1.03s/it]

Training_loss 16.15934


 65%|██████▌   | 1306/2000 [26:17<11:40,  1.01s/it]

Training_loss 16.18981


 65%|██████▌   | 1307/2000 [26:18<11:30,  1.00it/s]

Training_loss 16.20595


 65%|██████▌   | 1308/2000 [26:19<11:40,  1.01s/it]

Training_loss 16.20047


 65%|██████▌   | 1309/2000 [26:20<11:42,  1.02s/it]

Training_loss 16.18926


 66%|██████▌   | 1310/2000 [26:21<11:31,  1.00s/it]

Training_loss 16.19515


 66%|██████▌   | 1311/2000 [26:22<11:48,  1.03s/it]

Training_loss 16.18205


 66%|██████▌   | 1312/2000 [26:24<12:42,  1.11s/it]

Training_loss 16.18132


 66%|██████▌   | 1313/2000 [26:25<12:24,  1.08s/it]

Training_loss 16.18327


 66%|██████▌   | 1314/2000 [26:26<12:05,  1.06s/it]

Training_loss 16.19700


 66%|██████▌   | 1315/2000 [26:27<11:38,  1.02s/it]

Training_loss 16.22009


 66%|██████▌   | 1316/2000 [26:28<11:23,  1.00it/s]

Training_loss 16.22015


 66%|██████▌   | 1317/2000 [26:29<11:18,  1.01it/s]

Training_loss 16.20361


 66%|██████▌   | 1318/2000 [26:29<11:05,  1.03it/s]

Training_loss 16.18655


 66%|██████▌   | 1319/2000 [26:30<10:57,  1.04it/s]

Training_loss 16.18284


 66%|██████▌   | 1320/2000 [26:31<10:36,  1.07it/s]

Training_loss 16.14979


 66%|██████▌   | 1321/2000 [26:32<10:38,  1.06it/s]

Training_loss 16.13238


 66%|██████▌   | 1322/2000 [26:33<10:35,  1.07it/s]

Training_loss 16.13025


 66%|██████▌   | 1323/2000 [26:34<10:28,  1.08it/s]

Training_loss 16.13670


 66%|██████▌   | 1324/2000 [26:35<10:24,  1.08it/s]

Training_loss 16.12614


 66%|██████▋   | 1325/2000 [26:36<10:16,  1.10it/s]

Training_loss 16.13876


 66%|██████▋   | 1326/2000 [26:37<10:26,  1.08it/s]

Training_loss 16.14394


 66%|██████▋   | 1327/2000 [26:38<10:20,  1.09it/s]

Training_loss 16.15045


 66%|██████▋   | 1328/2000 [26:39<11:39,  1.04s/it]

Training_loss 16.12499


 66%|██████▋   | 1329/2000 [26:40<11:33,  1.03s/it]

Training_loss 16.12296


 66%|██████▋   | 1330/2000 [26:41<11:08,  1.00it/s]

Training_loss 16.13389


 67%|██████▋   | 1331/2000 [26:42<11:09,  1.00s/it]

Training_loss 16.13250


 67%|██████▋   | 1332/2000 [26:43<11:27,  1.03s/it]

Training_loss 16.13728


 67%|██████▋   | 1333/2000 [26:44<11:16,  1.01s/it]

Training_loss 16.14017


 67%|██████▋   | 1334/2000 [26:45<10:59,  1.01it/s]

Training_loss 16.10815


 67%|██████▋   | 1335/2000 [26:46<10:55,  1.01it/s]

Training_loss 16.11054


 67%|██████▋   | 1336/2000 [26:47<10:45,  1.03it/s]

Training_loss 16.11930


 67%|██████▋   | 1337/2000 [26:48<10:41,  1.03it/s]

Training_loss 16.12327


 67%|██████▋   | 1338/2000 [26:49<10:40,  1.03it/s]

Training_loss 16.11209


 67%|██████▋   | 1339/2000 [26:50<10:38,  1.04it/s]

Training_loss 16.10719


 67%|██████▋   | 1340/2000 [26:51<10:24,  1.06it/s]

Training_loss 16.09115


 67%|██████▋   | 1341/2000 [26:52<10:10,  1.08it/s]

Training_loss 16.09146


 67%|██████▋   | 1342/2000 [26:53<10:04,  1.09it/s]

Training_loss 16.10989


 67%|██████▋   | 1343/2000 [26:53<10:12,  1.07it/s]

Training_loss 16.11025


 67%|██████▋   | 1344/2000 [26:55<10:45,  1.02it/s]

Training_loss 16.12347


 67%|██████▋   | 1345/2000 [26:56<11:26,  1.05s/it]

Training_loss 16.13838


 67%|██████▋   | 1346/2000 [26:57<11:13,  1.03s/it]

Training_loss 16.13578


 67%|██████▋   | 1347/2000 [26:58<10:55,  1.00s/it]

Training_loss 16.15481


 67%|██████▋   | 1348/2000 [26:59<10:40,  1.02it/s]

Training_loss 16.17865


 67%|██████▋   | 1349/2000 [27:00<10:29,  1.03it/s]

Training_loss 16.17499


 68%|██████▊   | 1350/2000 [27:00<10:10,  1.07it/s]

Training_loss 16.18045


 68%|██████▊   | 1351/2000 [27:01<10:05,  1.07it/s]

Training_loss 16.20749


 68%|██████▊   | 1352/2000 [27:02<09:54,  1.09it/s]

Training_loss 16.21757


 68%|██████▊   | 1353/2000 [27:03<09:49,  1.10it/s]

Training_loss 16.22449


 68%|██████▊   | 1354/2000 [27:04<09:56,  1.08it/s]

Training_loss 16.21275


 68%|██████▊   | 1355/2000 [27:05<09:43,  1.11it/s]

Training_loss 16.19663


 68%|██████▊   | 1356/2000 [27:06<09:37,  1.11it/s]

Training_loss 16.21466


 68%|██████▊   | 1357/2000 [27:07<09:52,  1.09it/s]

Training_loss 16.22126


 68%|██████▊   | 1358/2000 [27:08<09:45,  1.10it/s]

Training_loss 16.23193


 68%|██████▊   | 1359/2000 [27:09<10:14,  1.04it/s]

Training_loss 16.23688


 68%|██████▊   | 1360/2000 [27:10<10:53,  1.02s/it]

Training_loss 16.24335


 68%|██████▊   | 1361/2000 [27:11<11:52,  1.11s/it]

Training_loss 16.24474


 68%|██████▊   | 1362/2000 [27:12<11:18,  1.06s/it]

Training_loss 16.25713


 68%|██████▊   | 1363/2000 [27:13<10:58,  1.03s/it]

Training_loss 16.25929


 68%|██████▊   | 1364/2000 [27:14<10:54,  1.03s/it]

Training_loss 16.27440


 68%|██████▊   | 1365/2000 [27:15<10:35,  1.00s/it]

Training_loss 16.28142


 68%|██████▊   | 1366/2000 [27:16<10:09,  1.04it/s]

Training_loss 16.27711


 68%|██████▊   | 1367/2000 [27:17<10:21,  1.02it/s]

Training_loss 16.27461


 68%|██████▊   | 1368/2000 [27:18<10:10,  1.04it/s]

Training_loss 16.26322


 68%|██████▊   | 1369/2000 [27:19<10:05,  1.04it/s]

Training_loss 16.28393


 68%|██████▊   | 1370/2000 [27:20<10:18,  1.02it/s]

Training_loss 16.30734


 69%|██████▊   | 1371/2000 [27:21<10:06,  1.04it/s]

Training_loss 16.30155


 69%|██████▊   | 1372/2000 [27:22<10:06,  1.04it/s]

Training_loss 16.29223


 69%|██████▊   | 1373/2000 [27:23<10:12,  1.02it/s]

Training_loss 16.32610


 69%|██████▊   | 1374/2000 [27:24<09:53,  1.06it/s]

Training_loss 16.33257


 69%|██████▉   | 1375/2000 [27:25<09:41,  1.07it/s]

Training_loss 16.33982


 69%|██████▉   | 1376/2000 [27:26<09:40,  1.07it/s]

Training_loss 16.30955


 69%|██████▉   | 1377/2000 [27:27<10:18,  1.01it/s]

Training_loss 16.31461


 69%|██████▉   | 1378/2000 [27:28<10:33,  1.02s/it]

Training_loss 16.29224


 69%|██████▉   | 1379/2000 [27:29<10:24,  1.01s/it]

Training_loss 16.25284


 69%|██████▉   | 1380/2000 [27:30<09:58,  1.04it/s]

Training_loss 16.24934


 69%|██████▉   | 1381/2000 [27:31<09:55,  1.04it/s]

Training_loss 16.23196


 69%|██████▉   | 1382/2000 [27:31<09:38,  1.07it/s]

Training_loss 16.21495


 69%|██████▉   | 1383/2000 [27:32<09:25,  1.09it/s]

Training_loss 16.23631


 69%|██████▉   | 1384/2000 [27:33<09:20,  1.10it/s]

Training_loss 16.25978


 69%|██████▉   | 1385/2000 [27:34<09:40,  1.06it/s]

Training_loss 16.26114


 69%|██████▉   | 1386/2000 [27:35<09:42,  1.05it/s]

Training_loss 16.27821


 69%|██████▉   | 1387/2000 [27:36<09:34,  1.07it/s]

Training_loss 16.25840


 69%|██████▉   | 1388/2000 [27:37<09:29,  1.07it/s]

Training_loss 16.25264


 69%|██████▉   | 1389/2000 [27:38<09:44,  1.04it/s]

Training_loss 16.23848


 70%|██████▉   | 1390/2000 [27:39<09:34,  1.06it/s]

Training_loss 16.24483


 70%|██████▉   | 1391/2000 [27:40<09:31,  1.07it/s]

Training_loss 16.24687


 70%|██████▉   | 1392/2000 [27:41<09:30,  1.07it/s]

Training_loss 16.25877


 70%|██████▉   | 1393/2000 [27:42<09:47,  1.03it/s]

Training_loss 16.26630


 70%|██████▉   | 1394/2000 [27:43<10:32,  1.04s/it]

Training_loss 16.30888


 70%|██████▉   | 1395/2000 [27:44<10:39,  1.06s/it]

Training_loss 16.33107


 70%|██████▉   | 1396/2000 [27:45<10:16,  1.02s/it]

Training_loss 16.34091


 70%|██████▉   | 1397/2000 [27:46<10:20,  1.03s/it]

Training_loss 16.36783


 70%|██████▉   | 1398/2000 [27:47<09:53,  1.01it/s]

Training_loss 16.37520


 70%|██████▉   | 1399/2000 [27:48<09:40,  1.04it/s]

Training_loss 16.37033


 70%|███████   | 1400/2000 [27:49<09:31,  1.05it/s]

Training_loss 16.36292


 70%|███████   | 1401/2000 [27:50<09:29,  1.05it/s]

Training_loss 16.33936


 70%|███████   | 1402/2000 [27:51<09:17,  1.07it/s]

Training_loss 16.33818


 70%|███████   | 1403/2000 [27:52<09:18,  1.07it/s]

Training_loss 16.34855


 70%|███████   | 1404/2000 [27:53<09:08,  1.09it/s]

Training_loss 16.32573


 70%|███████   | 1405/2000 [27:53<09:11,  1.08it/s]

Training_loss 16.34587


 70%|███████   | 1406/2000 [27:54<09:07,  1.08it/s]

Training_loss 16.36383


 70%|███████   | 1407/2000 [27:55<08:57,  1.10it/s]

Training_loss 16.37011


 70%|███████   | 1408/2000 [27:56<09:11,  1.07it/s]

Training_loss 16.40082


 70%|███████   | 1409/2000 [27:57<09:04,  1.09it/s]

Training_loss 16.43799


 70%|███████   | 1410/2000 [27:58<09:24,  1.04it/s]

Training_loss 16.41985


 71%|███████   | 1411/2000 [27:59<10:19,  1.05s/it]

Training_loss 16.41113


 71%|███████   | 1412/2000 [28:00<09:50,  1.00s/it]

Training_loss 16.43911


 71%|███████   | 1413/2000 [28:01<09:39,  1.01it/s]

Training_loss 16.42697


 71%|███████   | 1414/2000 [28:02<09:42,  1.01it/s]

Training_loss 16.40229


 71%|███████   | 1415/2000 [28:03<09:29,  1.03it/s]

Training_loss 16.40817


 71%|███████   | 1416/2000 [28:04<09:34,  1.02it/s]

Training_loss 16.40069


 71%|███████   | 1417/2000 [28:05<09:23,  1.03it/s]

Training_loss 16.39336


 71%|███████   | 1418/2000 [28:06<09:05,  1.07it/s]

Training_loss 16.38457


 71%|███████   | 1419/2000 [28:07<09:06,  1.06it/s]

Training_loss 16.35871


 71%|███████   | 1420/2000 [28:08<08:58,  1.08it/s]

Training_loss 16.35120


 71%|███████   | 1421/2000 [28:09<09:30,  1.01it/s]

Training_loss 16.35601


 71%|███████   | 1422/2000 [28:10<10:02,  1.04s/it]

Training_loss 16.33885


 71%|███████   | 1423/2000 [28:11<10:04,  1.05s/it]

Training_loss 16.32709


 71%|███████   | 1424/2000 [28:12<09:34,  1.00it/s]

Training_loss 16.34458


 71%|███████▏  | 1425/2000 [28:13<09:22,  1.02it/s]

Training_loss 16.32655


 71%|███████▏  | 1426/2000 [28:14<09:30,  1.01it/s]

Training_loss 16.33254


 71%|███████▏  | 1427/2000 [28:15<10:12,  1.07s/it]

Training_loss 16.32947


 71%|███████▏  | 1428/2000 [28:16<09:51,  1.03s/it]

Training_loss 16.30729


 71%|███████▏  | 1429/2000 [28:17<09:18,  1.02it/s]

Training_loss 16.30093


 72%|███████▏  | 1430/2000 [28:18<09:08,  1.04it/s]

Training_loss 16.28273


 72%|███████▏  | 1431/2000 [28:19<09:07,  1.04it/s]

Training_loss 16.29605


 72%|███████▏  | 1432/2000 [28:20<09:17,  1.02it/s]

Training_loss 16.31256


 72%|███████▏  | 1433/2000 [28:21<09:21,  1.01it/s]

Training_loss 16.32716


 72%|███████▏  | 1434/2000 [28:22<08:58,  1.05it/s]

Training_loss 16.33106


 72%|███████▏  | 1435/2000 [28:23<09:15,  1.02it/s]

Training_loss 16.29251


 72%|███████▏  | 1436/2000 [28:24<09:14,  1.02it/s]

Training_loss 16.27612


 72%|███████▏  | 1437/2000 [28:25<09:07,  1.03it/s]

Training_loss 16.29531


 72%|███████▏  | 1438/2000 [28:26<08:53,  1.05it/s]

Training_loss 16.29139


 72%|███████▏  | 1439/2000 [28:27<08:47,  1.06it/s]

Training_loss 16.31750


 72%|███████▏  | 1440/2000 [28:28<08:39,  1.08it/s]

Training_loss 16.31115


 72%|███████▏  | 1441/2000 [28:29<08:54,  1.05it/s]

Training_loss 16.30433


 72%|███████▏  | 1442/2000 [28:30<08:51,  1.05it/s]

Training_loss 16.28783


 72%|███████▏  | 1443/2000 [28:31<09:19,  1.00s/it]

Training_loss 16.25480


 72%|███████▏  | 1444/2000 [28:32<09:48,  1.06s/it]

Training_loss 16.26469


 72%|███████▏  | 1445/2000 [28:33<09:27,  1.02s/it]

Training_loss 16.27120


 72%|███████▏  | 1446/2000 [28:34<09:03,  1.02it/s]

Training_loss 16.28339


 72%|███████▏  | 1447/2000 [28:35<08:51,  1.04it/s]

Training_loss 16.30907


 72%|███████▏  | 1448/2000 [28:36<08:41,  1.06it/s]

Training_loss 16.31179


 72%|███████▏  | 1449/2000 [28:36<08:34,  1.07it/s]

Training_loss 16.31101


 72%|███████▎  | 1450/2000 [28:37<08:33,  1.07it/s]

Training_loss 16.31930


 73%|███████▎  | 1451/2000 [28:38<08:30,  1.08it/s]

Training_loss 16.32394


 73%|███████▎  | 1452/2000 [28:39<08:32,  1.07it/s]

Training_loss 16.33460


 73%|███████▎  | 1453/2000 [28:40<08:22,  1.09it/s]

Training_loss 16.34538


 73%|███████▎  | 1454/2000 [28:41<08:34,  1.06it/s]

Training_loss 16.33704


 73%|███████▎  | 1455/2000 [28:42<08:47,  1.03it/s]

Training_loss 16.33801


 73%|███████▎  | 1456/2000 [28:43<08:56,  1.01it/s]

Training_loss 16.35067


 73%|███████▎  | 1457/2000 [28:44<08:44,  1.04it/s]

Training_loss 16.35880


 73%|███████▎  | 1458/2000 [28:45<08:32,  1.06it/s]

Training_loss 16.32789


 73%|███████▎  | 1459/2000 [28:46<08:22,  1.08it/s]

Training_loss 16.32406


 73%|███████▎  | 1460/2000 [28:47<09:07,  1.01s/it]

Training_loss 16.30198


 73%|███████▎  | 1461/2000 [28:48<09:10,  1.02s/it]

Training_loss 16.28019


 73%|███████▎  | 1462/2000 [28:49<08:52,  1.01it/s]

Training_loss 16.26699


 73%|███████▎  | 1463/2000 [28:50<08:37,  1.04it/s]

Training_loss 16.28921


 73%|███████▎  | 1464/2000 [28:51<08:29,  1.05it/s]

Training_loss 16.29169


 73%|███████▎  | 1465/2000 [28:52<08:23,  1.06it/s]

Training_loss 16.29139


 73%|███████▎  | 1466/2000 [28:53<08:15,  1.08it/s]

Training_loss 16.29175


 73%|███████▎  | 1467/2000 [28:54<08:20,  1.06it/s]

Training_loss 16.29233


 73%|███████▎  | 1468/2000 [28:55<08:24,  1.05it/s]

Training_loss 16.30275


 73%|███████▎  | 1469/2000 [28:55<08:17,  1.07it/s]

Training_loss 16.30961


 74%|███████▎  | 1470/2000 [28:56<08:24,  1.05it/s]

Training_loss 16.30839


 74%|███████▎  | 1471/2000 [28:57<08:27,  1.04it/s]

Training_loss 16.31764


 74%|███████▎  | 1472/2000 [28:58<08:22,  1.05it/s]

Training_loss 16.31391


 74%|███████▎  | 1473/2000 [28:59<08:15,  1.06it/s]

Training_loss 16.29364


 74%|███████▎  | 1474/2000 [29:00<08:21,  1.05it/s]

Training_loss 16.30991


 74%|███████▍  | 1475/2000 [29:01<08:19,  1.05it/s]

Training_loss 16.35772


 74%|███████▍  | 1476/2000 [29:02<08:39,  1.01it/s]

Training_loss 16.31114


 74%|███████▍  | 1477/2000 [29:04<09:08,  1.05s/it]

Training_loss 16.30577


 74%|███████▍  | 1478/2000 [29:04<08:47,  1.01s/it]

Training_loss 16.34229


 74%|███████▍  | 1479/2000 [29:05<08:30,  1.02it/s]

Training_loss 16.32893


 74%|███████▍  | 1480/2000 [29:06<08:15,  1.05it/s]

Training_loss 16.30490


 74%|███████▍  | 1481/2000 [29:07<08:02,  1.07it/s]

Training_loss 16.28193


 74%|███████▍  | 1482/2000 [29:08<08:44,  1.01s/it]

Training_loss 16.26626


 74%|███████▍  | 1483/2000 [29:09<09:00,  1.05s/it]

Training_loss 16.27300


 74%|███████▍  | 1484/2000 [29:11<09:58,  1.16s/it]

Training_loss 16.25473


 74%|███████▍  | 1485/2000 [29:12<09:56,  1.16s/it]

Training_loss 16.24162


 74%|███████▍  | 1486/2000 [29:13<09:45,  1.14s/it]

Training_loss 16.28172


 74%|███████▍  | 1487/2000 [29:14<09:30,  1.11s/it]

Training_loss 16.26045


 74%|███████▍  | 1488/2000 [29:15<09:54,  1.16s/it]

Training_loss 16.24083


 74%|███████▍  | 1489/2000 [29:17<09:46,  1.15s/it]

Training_loss 16.24544


 74%|███████▍  | 1490/2000 [29:17<09:09,  1.08s/it]

Training_loss 16.25465


 75%|███████▍  | 1491/2000 [29:19<09:19,  1.10s/it]

Training_loss 16.22899


 75%|███████▍  | 1492/2000 [29:20<10:23,  1.23s/it]

Training_loss 16.19847


 75%|███████▍  | 1493/2000 [29:21<10:04,  1.19s/it]

Training_loss 16.19900


 75%|███████▍  | 1494/2000 [29:22<09:22,  1.11s/it]

Training_loss 16.19620


 75%|███████▍  | 1495/2000 [29:23<08:56,  1.06s/it]

Training_loss 16.18484


 75%|███████▍  | 1496/2000 [29:24<08:28,  1.01s/it]

Training_loss 16.18688


 75%|███████▍  | 1497/2000 [29:25<08:22,  1.00it/s]

Training_loss 16.22893


 75%|███████▍  | 1498/2000 [29:26<08:10,  1.02it/s]

Training_loss 16.20461


 75%|███████▍  | 1499/2000 [29:27<08:18,  1.00it/s]

Training_loss 16.20235


 75%|███████▌  | 1500/2000 [29:28<08:20,  1.00s/it]

Training_loss 16.22389


 75%|███████▌  | 1501/2000 [29:29<08:15,  1.01it/s]

Training_loss 16.26165


 75%|███████▌  | 1502/2000 [29:30<08:04,  1.03it/s]

Training_loss 16.24426


 75%|███████▌  | 1503/2000 [29:31<07:54,  1.05it/s]

Training_loss 16.27678


 75%|███████▌  | 1504/2000 [29:32<07:52,  1.05it/s]

Training_loss 16.29442


 75%|███████▌  | 1505/2000 [29:33<07:41,  1.07it/s]

Training_loss 16.30133


 75%|███████▌  | 1506/2000 [29:34<07:43,  1.07it/s]

Training_loss 16.30292


 75%|███████▌  | 1507/2000 [29:35<08:16,  1.01s/it]

Training_loss 16.32129


 75%|███████▌  | 1508/2000 [29:36<08:25,  1.03s/it]

Training_loss 16.31382


 75%|███████▌  | 1509/2000 [29:37<08:04,  1.01it/s]

Training_loss 16.31783


 76%|███████▌  | 1510/2000 [29:38<07:59,  1.02it/s]

Training_loss 16.30781


 76%|███████▌  | 1511/2000 [29:39<07:59,  1.02it/s]

Training_loss 16.32514


 76%|███████▌  | 1512/2000 [29:40<07:50,  1.04it/s]

Training_loss 16.31303


 76%|███████▌  | 1513/2000 [29:40<07:42,  1.05it/s]

Training_loss 16.29362


 76%|███████▌  | 1514/2000 [29:41<07:47,  1.04it/s]

Training_loss 16.32535


 76%|███████▌  | 1515/2000 [29:42<07:48,  1.03it/s]

Training_loss 16.35236


 76%|███████▌  | 1516/2000 [29:43<07:44,  1.04it/s]

Training_loss 16.35233


 76%|███████▌  | 1517/2000 [29:44<07:40,  1.05it/s]

Training_loss 16.36009


 76%|███████▌  | 1518/2000 [29:45<07:32,  1.06it/s]

Training_loss 16.35792


 76%|███████▌  | 1519/2000 [29:46<07:27,  1.08it/s]

Training_loss 16.36309


 76%|███████▌  | 1520/2000 [29:47<07:29,  1.07it/s]

Training_loss 16.35935


 76%|███████▌  | 1521/2000 [29:48<07:27,  1.07it/s]

Training_loss 16.37243


 76%|███████▌  | 1522/2000 [29:49<07:20,  1.09it/s]

Training_loss 16.38647


 76%|███████▌  | 1523/2000 [29:50<07:36,  1.04it/s]

Training_loss 16.39617


 76%|███████▌  | 1524/2000 [29:51<08:19,  1.05s/it]

Training_loss 16.37621


 76%|███████▋  | 1525/2000 [29:52<08:13,  1.04s/it]

Training_loss 16.40127


 76%|███████▋  | 1526/2000 [29:53<07:59,  1.01s/it]

Training_loss 16.41397


 76%|███████▋  | 1527/2000 [29:54<07:35,  1.04it/s]

Training_loss 16.43440


 76%|███████▋  | 1528/2000 [29:55<07:28,  1.05it/s]

Training_loss 16.45388


 76%|███████▋  | 1529/2000 [29:56<07:21,  1.07it/s]

Training_loss 16.45159


 76%|███████▋  | 1530/2000 [29:57<07:27,  1.05it/s]

Training_loss 16.47388


 77%|███████▋  | 1531/2000 [29:58<07:27,  1.05it/s]

Training_loss 16.49572


 77%|███████▋  | 1532/2000 [29:59<07:12,  1.08it/s]

Training_loss 16.48480


 77%|███████▋  | 1533/2000 [30:00<07:10,  1.09it/s]

Training_loss 16.52073


 77%|███████▋  | 1534/2000 [30:01<07:11,  1.08it/s]

Training_loss 16.52533


 77%|███████▋  | 1535/2000 [30:01<07:12,  1.08it/s]

Training_loss 16.50812


 77%|███████▋  | 1536/2000 [30:02<07:04,  1.09it/s]

Training_loss 16.52032


 77%|███████▋  | 1537/2000 [30:03<07:07,  1.08it/s]

Training_loss 16.52686


 77%|███████▋  | 1538/2000 [30:04<07:09,  1.08it/s]

Training_loss 16.53821


 77%|███████▋  | 1539/2000 [30:05<07:04,  1.09it/s]

Training_loss 16.53529


 77%|███████▋  | 1540/2000 [30:06<07:16,  1.05it/s]

Training_loss 16.54248


 77%|███████▋  | 1541/2000 [30:07<07:55,  1.04s/it]

Training_loss 16.53037


 77%|███████▋  | 1542/2000 [30:08<08:05,  1.06s/it]

Training_loss 16.53837


 77%|███████▋  | 1543/2000 [30:10<08:17,  1.09s/it]

Training_loss 16.56172


 77%|███████▋  | 1544/2000 [30:11<08:25,  1.11s/it]

Training_loss 16.57281


 77%|███████▋  | 1545/2000 [30:12<08:36,  1.14s/it]

Training_loss 16.58211


 77%|███████▋  | 1546/2000 [30:13<08:31,  1.13s/it]

Training_loss 16.58085


 77%|███████▋  | 1547/2000 [30:14<08:11,  1.09s/it]

Training_loss 16.56072


 77%|███████▋  | 1548/2000 [30:15<07:39,  1.02s/it]

Training_loss 16.59494


 77%|███████▋  | 1549/2000 [30:16<07:34,  1.01s/it]

Training_loss 16.58925


 78%|███████▊  | 1550/2000 [30:17<07:24,  1.01it/s]

Training_loss 16.57305


 78%|███████▊  | 1551/2000 [30:18<07:10,  1.04it/s]

Training_loss 16.57232


 78%|███████▊  | 1552/2000 [30:19<07:05,  1.05it/s]

Training_loss 16.59968


 78%|███████▊  | 1553/2000 [30:20<07:00,  1.06it/s]

Training_loss 16.59561


 78%|███████▊  | 1554/2000 [30:21<06:55,  1.07it/s]

Training_loss 16.59939


 78%|███████▊  | 1555/2000 [30:22<07:07,  1.04it/s]

Training_loss 16.60276


 78%|███████▊  | 1556/2000 [30:23<07:48,  1.06s/it]

Training_loss 16.60977


 78%|███████▊  | 1557/2000 [30:24<07:53,  1.07s/it]

Training_loss 16.58652


 78%|███████▊  | 1558/2000 [30:25<07:37,  1.03s/it]

Training_loss 16.55161


 78%|███████▊  | 1559/2000 [30:26<07:24,  1.01s/it]

Training_loss 16.53773


 78%|███████▊  | 1560/2000 [30:27<07:03,  1.04it/s]

Training_loss 16.53710


 78%|███████▊  | 1561/2000 [30:28<07:00,  1.04it/s]

Training_loss 16.51679


 78%|███████▊  | 1562/2000 [30:29<06:47,  1.07it/s]

Training_loss 16.50470


 78%|███████▊  | 1563/2000 [30:29<06:46,  1.08it/s]

Training_loss 16.50046


 78%|███████▊  | 1564/2000 [30:30<06:41,  1.09it/s]

Training_loss 16.52228


 78%|███████▊  | 1565/2000 [30:31<06:42,  1.08it/s]

Training_loss 16.50514


 78%|███████▊  | 1566/2000 [30:32<06:39,  1.09it/s]

Training_loss 16.49076


 78%|███████▊  | 1567/2000 [30:33<06:44,  1.07it/s]

Training_loss 16.49451


 78%|███████▊  | 1568/2000 [30:34<06:40,  1.08it/s]

Training_loss 16.48402


 78%|███████▊  | 1569/2000 [30:35<06:53,  1.04it/s]

Training_loss 16.51304


 78%|███████▊  | 1570/2000 [30:36<07:49,  1.09s/it]

Training_loss 16.52207


 79%|███████▊  | 1571/2000 [30:38<07:51,  1.10s/it]

Training_loss 16.52869


 79%|███████▊  | 1572/2000 [30:39<08:07,  1.14s/it]

Training_loss 16.53735


 79%|███████▊  | 1573/2000 [30:40<07:57,  1.12s/it]

Training_loss 16.55041


 79%|███████▊  | 1574/2000 [30:41<07:37,  1.07s/it]

Training_loss 16.54764


 79%|███████▉  | 1575/2000 [30:42<08:01,  1.13s/it]

Training_loss 16.56282


 79%|███████▉  | 1576/2000 [30:43<07:40,  1.09s/it]

Training_loss 16.58704


 79%|███████▉  | 1577/2000 [30:44<07:21,  1.04s/it]

Training_loss 16.59359


 79%|███████▉  | 1578/2000 [30:45<07:16,  1.03s/it]

Training_loss 16.58972


 79%|███████▉  | 1579/2000 [30:46<06:58,  1.01it/s]

Training_loss 16.59492


 79%|███████▉  | 1580/2000 [30:47<06:46,  1.03it/s]

Training_loss 16.62605


 79%|███████▉  | 1581/2000 [30:48<06:39,  1.05it/s]

Training_loss 16.61249


 79%|███████▉  | 1582/2000 [30:49<06:35,  1.06it/s]

Training_loss 16.61517


 79%|███████▉  | 1583/2000 [30:50<06:31,  1.07it/s]

Training_loss 16.61718


 79%|███████▉  | 1584/2000 [30:51<06:26,  1.08it/s]

Training_loss 16.60465


 79%|███████▉  | 1585/2000 [30:51<06:25,  1.08it/s]

Training_loss 16.60844


 79%|███████▉  | 1586/2000 [30:52<06:20,  1.09it/s]

Training_loss 16.60196


 79%|███████▉  | 1587/2000 [30:53<06:15,  1.10it/s]

Training_loss 16.61316


 79%|███████▉  | 1588/2000 [30:54<06:31,  1.05it/s]

Training_loss 16.63962


 79%|███████▉  | 1589/2000 [30:56<07:06,  1.04s/it]

Training_loss 16.65759


 80%|███████▉  | 1590/2000 [30:57<06:59,  1.02s/it]

Training_loss 16.65253


 80%|███████▉  | 1591/2000 [30:58<06:51,  1.01s/it]

Training_loss 16.65976


 80%|███████▉  | 1592/2000 [30:58<06:45,  1.01it/s]

Training_loss 16.66477


 80%|███████▉  | 1593/2000 [30:59<06:33,  1.03it/s]

Training_loss 16.69700


 80%|███████▉  | 1594/2000 [31:00<06:26,  1.05it/s]

Training_loss 16.68988


 80%|███████▉  | 1595/2000 [31:01<06:22,  1.06it/s]

Training_loss 16.70474


 80%|███████▉  | 1596/2000 [31:02<06:20,  1.06it/s]

Training_loss 16.71088


 80%|███████▉  | 1597/2000 [31:03<06:19,  1.06it/s]

Training_loss 16.70385


 80%|███████▉  | 1598/2000 [31:04<06:14,  1.07it/s]

Training_loss 16.71640


 80%|███████▉  | 1599/2000 [31:05<06:12,  1.08it/s]

Training_loss 16.68438


 80%|████████  | 1600/2000 [31:06<06:00,  1.11it/s]

Training_loss 16.68199


 80%|████████  | 1601/2000 [31:07<05:56,  1.12it/s]

Training_loss 16.68625


 80%|████████  | 1602/2000 [31:08<05:56,  1.12it/s]

Training_loss 16.68397


 80%|████████  | 1603/2000 [31:08<05:57,  1.11it/s]

Training_loss 16.67710


 80%|████████  | 1604/2000 [31:09<05:58,  1.10it/s]

Training_loss 16.68819


 80%|████████  | 1605/2000 [31:10<06:12,  1.06it/s]

Training_loss 16.66995


 80%|████████  | 1606/2000 [31:12<06:36,  1.01s/it]

Training_loss 16.68662


 80%|████████  | 1607/2000 [31:13<06:29,  1.01it/s]

Training_loss 16.68537


 80%|████████  | 1608/2000 [31:13<06:19,  1.03it/s]

Training_loss 16.68645


 80%|████████  | 1609/2000 [31:15<06:34,  1.01s/it]

Training_loss 16.67955


 80%|████████  | 1610/2000 [31:16<06:29,  1.00it/s]

Training_loss 16.71090


 81%|████████  | 1611/2000 [31:16<06:22,  1.02it/s]

Training_loss 16.76423


 81%|████████  | 1612/2000 [31:17<06:19,  1.02it/s]

Training_loss 16.79531


 81%|████████  | 1613/2000 [31:18<05:59,  1.08it/s]

Training_loss 16.82313


 81%|████████  | 1614/2000 [31:19<05:57,  1.08it/s]

Training_loss 16.85171


 81%|████████  | 1615/2000 [31:20<05:58,  1.07it/s]

Training_loss 16.85904


 81%|████████  | 1616/2000 [31:21<06:04,  1.05it/s]

Training_loss 16.84520


 81%|████████  | 1617/2000 [31:22<06:10,  1.03it/s]

Training_loss 16.83443


 81%|████████  | 1618/2000 [31:23<06:10,  1.03it/s]

Training_loss 16.85049


 81%|████████  | 1619/2000 [31:24<06:15,  1.01it/s]

Training_loss 16.83403


 81%|████████  | 1620/2000 [31:25<06:07,  1.03it/s]

Training_loss 16.82544


 81%|████████  | 1621/2000 [31:26<05:59,  1.06it/s]

Training_loss 16.80969


 81%|████████  | 1622/2000 [31:27<06:31,  1.04s/it]

Training_loss 16.80433


 81%|████████  | 1623/2000 [31:28<06:27,  1.03s/it]

Training_loss 16.81963


 81%|████████  | 1624/2000 [31:29<06:11,  1.01it/s]

Training_loss 16.83732


 81%|████████▏ | 1625/2000 [31:30<05:58,  1.04it/s]

Training_loss 16.79303


 81%|████████▏ | 1626/2000 [31:31<05:55,  1.05it/s]

Training_loss 16.77762


 81%|████████▏ | 1627/2000 [31:32<05:55,  1.05it/s]

Training_loss 16.75821


 81%|████████▏ | 1628/2000 [31:33<05:46,  1.07it/s]

Training_loss 16.79204


 81%|████████▏ | 1629/2000 [31:34<05:37,  1.10it/s]

Training_loss 16.77695


 82%|████████▏ | 1630/2000 [31:34<05:32,  1.11it/s]

Training_loss 16.77677


 82%|████████▏ | 1631/2000 [31:35<05:36,  1.10it/s]

Training_loss 16.79809


 82%|████████▏ | 1632/2000 [31:36<05:43,  1.07it/s]

Training_loss 16.80627


 82%|████████▏ | 1633/2000 [31:37<05:44,  1.07it/s]

Training_loss 16.82559


 82%|████████▏ | 1634/2000 [31:38<05:40,  1.07it/s]

Training_loss 16.81384


 82%|████████▏ | 1635/2000 [31:39<05:47,  1.05it/s]

Training_loss 16.83697


 82%|████████▏ | 1636/2000 [31:40<05:47,  1.05it/s]

Training_loss 16.84960


 82%|████████▏ | 1637/2000 [31:41<06:00,  1.01it/s]

Training_loss 16.81026


 82%|████████▏ | 1638/2000 [31:44<08:31,  1.41s/it]

Training_loss 16.82266


 82%|████████▏ | 1639/2000 [31:46<10:28,  1.74s/it]

Training_loss 16.78792


 82%|████████▏ | 1640/2000 [31:49<12:16,  2.05s/it]

Training_loss 16.79079


 82%|████████▏ | 1641/2000 [31:51<11:49,  1.98s/it]

Training_loss 16.80024


 82%|████████▏ | 1642/2000 [31:53<11:57,  2.01s/it]

Training_loss 16.78913


 82%|████████▏ | 1643/2000 [31:54<11:13,  1.89s/it]

Training_loss 16.80231


 82%|████████▏ | 1644/2000 [31:56<10:43,  1.81s/it]

Training_loss 16.78565


 82%|████████▏ | 1645/2000 [31:59<11:48,  1.99s/it]

Training_loss 16.78656


 82%|████████▏ | 1646/2000 [32:01<12:19,  2.09s/it]

Training_loss 16.74431


 82%|████████▏ | 1647/2000 [32:03<12:11,  2.07s/it]

Training_loss 16.73161


 82%|████████▏ | 1648/2000 [32:05<11:58,  2.04s/it]

Training_loss 16.73274


 82%|████████▏ | 1649/2000 [32:07<11:34,  1.98s/it]

Training_loss 16.73873


 82%|████████▎ | 1650/2000 [32:08<10:05,  1.73s/it]

Training_loss 16.77129


 83%|████████▎ | 1651/2000 [32:09<08:48,  1.51s/it]

Training_loss 16.75825


 83%|████████▎ | 1652/2000 [32:10<08:22,  1.44s/it]

Training_loss 16.77100


 83%|████████▎ | 1653/2000 [32:12<08:42,  1.51s/it]

Training_loss 16.76558


 83%|████████▎ | 1654/2000 [32:13<08:53,  1.54s/it]

Training_loss 16.75521


 83%|████████▎ | 1655/2000 [32:14<08:08,  1.42s/it]

Training_loss 16.74260


 83%|████████▎ | 1656/2000 [32:16<07:50,  1.37s/it]

Training_loss 16.75674


 83%|████████▎ | 1657/2000 [32:17<07:26,  1.30s/it]

Training_loss 16.74682


 83%|████████▎ | 1658/2000 [32:18<06:51,  1.20s/it]

Training_loss 16.76266


 83%|████████▎ | 1659/2000 [32:19<06:21,  1.12s/it]

Training_loss 16.76879


 83%|████████▎ | 1660/2000 [32:20<06:32,  1.16s/it]

Training_loss 16.71489


 83%|████████▎ | 1661/2000 [32:21<06:32,  1.16s/it]

Training_loss 16.68713


 83%|████████▎ | 1662/2000 [32:23<07:55,  1.41s/it]

Training_loss 16.67443


 83%|████████▎ | 1663/2000 [32:25<08:26,  1.50s/it]

Training_loss 16.67948


 83%|████████▎ | 1664/2000 [32:27<09:03,  1.62s/it]

Training_loss 16.68082


 83%|████████▎ | 1665/2000 [32:28<08:19,  1.49s/it]

Training_loss 16.70812


 83%|████████▎ | 1666/2000 [32:29<07:48,  1.40s/it]

Training_loss 16.73476


 83%|████████▎ | 1667/2000 [32:31<07:47,  1.40s/it]

Training_loss 16.75168


 83%|████████▎ | 1668/2000 [32:32<07:38,  1.38s/it]

Training_loss 16.75776


 83%|████████▎ | 1669/2000 [32:33<07:12,  1.31s/it]

Training_loss 16.74189


 84%|████████▎ | 1670/2000 [32:34<06:47,  1.24s/it]

Training_loss 16.73804


 84%|████████▎ | 1671/2000 [32:35<06:41,  1.22s/it]

Training_loss 16.75593


 84%|████████▎ | 1672/2000 [32:36<06:32,  1.20s/it]

Training_loss 16.76784


 84%|████████▎ | 1673/2000 [32:38<06:20,  1.16s/it]

Training_loss 16.77662


 84%|████████▎ | 1674/2000 [32:39<06:24,  1.18s/it]

Training_loss 16.76112


 84%|████████▍ | 1675/2000 [32:40<06:36,  1.22s/it]

Training_loss 16.73991


 84%|████████▍ | 1676/2000 [32:42<07:26,  1.38s/it]

Training_loss 16.70759


 84%|████████▍ | 1677/2000 [32:44<08:18,  1.54s/it]

Training_loss 16.73439


 84%|████████▍ | 1678/2000 [32:45<08:29,  1.58s/it]

Training_loss 16.76768


 84%|████████▍ | 1679/2000 [32:48<09:40,  1.81s/it]

Training_loss 16.78460


 84%|████████▍ | 1680/2000 [32:49<09:23,  1.76s/it]

Training_loss 16.78755


 84%|████████▍ | 1681/2000 [32:50<08:07,  1.53s/it]

Training_loss 16.74905


 84%|████████▍ | 1682/2000 [32:51<07:17,  1.38s/it]

Training_loss 16.73205


 84%|████████▍ | 1683/2000 [32:52<06:40,  1.26s/it]

Training_loss 16.74062


 84%|████████▍ | 1684/2000 [32:54<06:29,  1.23s/it]

Training_loss 16.73144


 84%|████████▍ | 1685/2000 [32:55<06:22,  1.21s/it]

Training_loss 16.75914


 84%|████████▍ | 1686/2000 [32:56<06:26,  1.23s/it]

Training_loss 16.74864


 84%|████████▍ | 1687/2000 [32:57<06:11,  1.19s/it]

Training_loss 16.76219


 84%|████████▍ | 1688/2000 [32:58<05:55,  1.14s/it]

Training_loss 16.78026


 84%|████████▍ | 1689/2000 [32:59<05:44,  1.11s/it]

Training_loss 16.78065


 84%|████████▍ | 1690/2000 [33:00<05:37,  1.09s/it]

Training_loss 16.78414


 85%|████████▍ | 1691/2000 [33:01<05:40,  1.10s/it]

Training_loss 16.78898


 85%|████████▍ | 1692/2000 [33:02<05:41,  1.11s/it]

Training_loss 16.80243


 85%|████████▍ | 1693/2000 [33:04<05:59,  1.17s/it]

Training_loss 16.79080


 85%|████████▍ | 1694/2000 [33:05<05:43,  1.12s/it]

Training_loss 16.79505


 85%|████████▍ | 1695/2000 [33:06<05:29,  1.08s/it]

Training_loss 16.77531


 85%|████████▍ | 1696/2000 [33:07<05:17,  1.05s/it]

Training_loss 16.77453


 85%|████████▍ | 1697/2000 [33:08<05:18,  1.05s/it]

Training_loss 16.78932


 85%|████████▍ | 1698/2000 [33:09<05:40,  1.13s/it]

Training_loss 16.79341


 85%|████████▍ | 1699/2000 [33:10<05:32,  1.10s/it]

Training_loss 16.78582


 85%|████████▌ | 1700/2000 [33:11<05:14,  1.05s/it]

Training_loss 16.78610


 85%|████████▌ | 1701/2000 [33:12<05:06,  1.02s/it]

Training_loss 16.78345


 85%|████████▌ | 1702/2000 [33:13<05:04,  1.02s/it]

Training_loss 16.75649


 85%|████████▌ | 1703/2000 [33:14<04:57,  1.00s/it]

Training_loss 16.77321


 85%|████████▌ | 1704/2000 [33:15<04:47,  1.03it/s]

Training_loss 16.77912


 85%|████████▌ | 1705/2000 [33:16<04:52,  1.01it/s]

Training_loss 16.76173


 85%|████████▌ | 1706/2000 [33:17<04:47,  1.02it/s]

Training_loss 16.76080


 85%|████████▌ | 1707/2000 [33:18<04:47,  1.02it/s]

Training_loss 16.74127


 85%|████████▌ | 1708/2000 [33:19<05:16,  1.09s/it]

Training_loss 16.74129


 85%|████████▌ | 1709/2000 [33:20<05:09,  1.06s/it]

Training_loss 16.74730


 86%|████████▌ | 1710/2000 [33:21<05:07,  1.06s/it]

Training_loss 16.72921


 86%|████████▌ | 1711/2000 [33:22<04:58,  1.03s/it]

Training_loss 16.72137


 86%|████████▌ | 1712/2000 [33:23<05:00,  1.04s/it]

Training_loss 16.68587


 86%|████████▌ | 1713/2000 [33:24<04:52,  1.02s/it]

Training_loss 16.69224


 86%|████████▌ | 1714/2000 [33:25<04:54,  1.03s/it]

Training_loss 16.67986


 86%|████████▌ | 1715/2000 [33:26<04:54,  1.03s/it]

Training_loss 16.67449


 86%|████████▌ | 1716/2000 [33:27<05:01,  1.06s/it]

Training_loss 16.68543


 86%|████████▌ | 1717/2000 [33:28<04:51,  1.03s/it]

Training_loss 16.69454


 86%|████████▌ | 1718/2000 [33:29<04:38,  1.01it/s]

Training_loss 16.66747


 86%|████████▌ | 1719/2000 [33:30<04:38,  1.01it/s]

Training_loss 16.66879


 86%|████████▌ | 1720/2000 [33:31<04:36,  1.01it/s]

Training_loss 16.65292


 86%|████████▌ | 1721/2000 [33:32<04:31,  1.03it/s]

Training_loss 16.66880


 86%|████████▌ | 1722/2000 [33:33<04:28,  1.04it/s]

Training_loss 16.67982


 86%|████████▌ | 1723/2000 [33:34<04:29,  1.03it/s]

Training_loss 16.68165


 86%|████████▌ | 1724/2000 [33:36<04:55,  1.07s/it]

Training_loss 16.68974


 86%|████████▋ | 1725/2000 [33:37<04:50,  1.06s/it]

Training_loss 16.67162


 86%|████████▋ | 1726/2000 [33:37<04:41,  1.03s/it]

Training_loss 16.66576


 86%|████████▋ | 1727/2000 [33:38<04:35,  1.01s/it]

Training_loss 16.68343


 86%|████████▋ | 1728/2000 [33:39<04:26,  1.02it/s]

Training_loss 16.68702


 86%|████████▋ | 1729/2000 [33:40<04:30,  1.00it/s]

Training_loss 16.69904


 86%|████████▋ | 1730/2000 [33:41<04:25,  1.02it/s]

Training_loss 16.72824


 87%|████████▋ | 1731/2000 [33:42<04:25,  1.01it/s]

Training_loss 16.72594


 87%|████████▋ | 1732/2000 [33:43<04:27,  1.00it/s]

Training_loss 16.75051


 87%|████████▋ | 1733/2000 [33:44<04:19,  1.03it/s]

Training_loss 16.77535


 87%|████████▋ | 1734/2000 [33:45<04:23,  1.01it/s]

Training_loss 16.80150


 87%|████████▋ | 1735/2000 [33:46<04:18,  1.02it/s]

Training_loss 16.79991


 87%|████████▋ | 1736/2000 [33:47<04:19,  1.02it/s]

Training_loss 16.79622


 87%|████████▋ | 1737/2000 [33:48<04:13,  1.04it/s]

Training_loss 16.79269


 87%|████████▋ | 1738/2000 [33:49<04:08,  1.06it/s]

Training_loss 16.80919


 87%|████████▋ | 1739/2000 [33:50<04:16,  1.02it/s]

Training_loss 16.80774


 87%|████████▋ | 1740/2000 [33:51<04:40,  1.08s/it]

Training_loss 16.79222


 87%|████████▋ | 1741/2000 [33:52<04:33,  1.05s/it]

Training_loss 16.82100


 87%|████████▋ | 1742/2000 [33:53<04:28,  1.04s/it]

Training_loss 16.81454


 87%|████████▋ | 1743/2000 [33:55<04:26,  1.04s/it]

Training_loss 16.78781


 87%|████████▋ | 1744/2000 [33:55<04:22,  1.02s/it]

Training_loss 16.78280


 87%|████████▋ | 1745/2000 [33:57<04:22,  1.03s/it]

Training_loss 16.76967


 87%|████████▋ | 1746/2000 [33:57<04:11,  1.01it/s]

Training_loss 16.76069


 87%|████████▋ | 1747/2000 [33:58<04:08,  1.02it/s]

Training_loss 16.76896


 87%|████████▋ | 1748/2000 [33:59<04:02,  1.04it/s]

Training_loss 16.78227


 87%|████████▋ | 1749/2000 [34:00<04:10,  1.00it/s]

Training_loss 16.77173


 88%|████████▊ | 1750/2000 [34:01<04:01,  1.04it/s]

Training_loss 16.77181


 88%|████████▊ | 1751/2000 [34:02<04:00,  1.04it/s]

Training_loss 16.77390


 88%|████████▊ | 1752/2000 [34:03<04:01,  1.02it/s]

Training_loss 16.77370


 88%|████████▊ | 1753/2000 [34:04<03:59,  1.03it/s]

Training_loss 16.75531


 88%|████████▊ | 1754/2000 [34:05<03:59,  1.03it/s]

Training_loss 16.76917


 88%|████████▊ | 1755/2000 [34:06<04:07,  1.01s/it]

Training_loss 16.78439


 88%|████████▊ | 1756/2000 [34:07<04:20,  1.07s/it]

Training_loss 16.78892


 88%|████████▊ | 1757/2000 [34:09<04:34,  1.13s/it]

Training_loss 16.81233


 88%|████████▊ | 1758/2000 [34:10<04:33,  1.13s/it]

Training_loss 16.81153


 88%|████████▊ | 1759/2000 [34:11<04:33,  1.13s/it]

Training_loss 16.83554


 88%|████████▊ | 1760/2000 [34:12<04:25,  1.11s/it]

Training_loss 16.85621


 88%|████████▊ | 1761/2000 [34:13<04:09,  1.04s/it]

Training_loss 16.84786


 88%|████████▊ | 1762/2000 [34:14<04:01,  1.02s/it]

Training_loss 16.82007


 88%|████████▊ | 1763/2000 [34:15<03:54,  1.01it/s]

Training_loss 16.82264


 88%|████████▊ | 1764/2000 [34:16<03:50,  1.03it/s]

Training_loss 16.83033


 88%|████████▊ | 1765/2000 [34:17<03:45,  1.04it/s]

Training_loss 16.85331


 88%|████████▊ | 1766/2000 [34:18<03:43,  1.05it/s]

Training_loss 16.89352


 88%|████████▊ | 1767/2000 [34:19<03:45,  1.03it/s]

Training_loss 16.92076


 88%|████████▊ | 1768/2000 [34:20<04:03,  1.05s/it]

Training_loss 16.92790


 88%|████████▊ | 1769/2000 [34:21<04:07,  1.07s/it]

Training_loss 16.96785


 88%|████████▊ | 1770/2000 [34:22<03:59,  1.04s/it]

Training_loss 16.98722


 89%|████████▊ | 1771/2000 [34:23<04:20,  1.14s/it]

Training_loss 16.96916


 89%|████████▊ | 1772/2000 [34:25<04:32,  1.20s/it]

Training_loss 16.98372


 89%|████████▊ | 1773/2000 [34:26<04:27,  1.18s/it]

Training_loss 17.01750


 89%|████████▊ | 1774/2000 [34:27<04:25,  1.18s/it]

Training_loss 17.00988


 89%|████████▉ | 1775/2000 [34:28<04:22,  1.17s/it]

Training_loss 17.03098


 89%|████████▉ | 1776/2000 [34:29<04:06,  1.10s/it]

Training_loss 17.01653


 89%|████████▉ | 1777/2000 [34:30<04:07,  1.11s/it]

Training_loss 16.99268


 89%|████████▉ | 1778/2000 [34:31<04:09,  1.12s/it]

Training_loss 16.96655


 89%|████████▉ | 1779/2000 [34:33<04:20,  1.18s/it]

Training_loss 16.98532


 89%|████████▉ | 1780/2000 [34:34<04:21,  1.19s/it]

Training_loss 16.98448


 89%|████████▉ | 1781/2000 [34:35<04:11,  1.15s/it]

Training_loss 17.00505


 89%|████████▉ | 1782/2000 [34:36<03:55,  1.08s/it]

Training_loss 17.00826


 89%|████████▉ | 1783/2000 [34:37<03:47,  1.05s/it]

Training_loss 17.01745


 89%|████████▉ | 1784/2000 [34:38<03:38,  1.01s/it]

Training_loss 17.02118


 89%|████████▉ | 1785/2000 [34:39<03:55,  1.09s/it]

Training_loss 17.03395


 89%|████████▉ | 1786/2000 [34:40<03:50,  1.08s/it]

Training_loss 17.03719


 89%|████████▉ | 1787/2000 [34:41<03:49,  1.08s/it]

Training_loss 17.01781


 89%|████████▉ | 1788/2000 [34:42<03:53,  1.10s/it]

Training_loss 17.00965


 89%|████████▉ | 1789/2000 [34:43<03:51,  1.10s/it]

Training_loss 16.99978


 90%|████████▉ | 1790/2000 [34:45<03:53,  1.11s/it]

Training_loss 17.02530


 90%|████████▉ | 1791/2000 [34:46<03:48,  1.10s/it]

Training_loss 16.98941


 90%|████████▉ | 1792/2000 [34:47<03:39,  1.05s/it]

Training_loss 16.99765


 90%|████████▉ | 1793/2000 [34:48<03:31,  1.02s/it]

Training_loss 16.99954


 90%|████████▉ | 1794/2000 [34:48<03:23,  1.01it/s]

Training_loss 16.99352


 90%|████████▉ | 1795/2000 [34:49<03:20,  1.02it/s]

Training_loss 16.95472


 90%|████████▉ | 1796/2000 [34:50<03:20,  1.02it/s]

Training_loss 16.94904


 90%|████████▉ | 1797/2000 [34:51<03:14,  1.04it/s]

Training_loss 16.95323


 90%|████████▉ | 1798/2000 [34:52<03:12,  1.05it/s]

Training_loss 16.98126


 90%|████████▉ | 1799/2000 [34:53<03:12,  1.05it/s]

Training_loss 16.99403


 90%|█████████ | 1800/2000 [34:54<03:19,  1.00it/s]

Training_loss 16.96344


 90%|█████████ | 1801/2000 [34:55<03:31,  1.06s/it]

Training_loss 16.93061


 90%|█████████ | 1802/2000 [34:56<03:26,  1.04s/it]

Training_loss 16.94907


 90%|█████████ | 1803/2000 [34:57<03:18,  1.01s/it]

Training_loss 16.96498


 90%|█████████ | 1804/2000 [34:58<03:19,  1.02s/it]

Training_loss 16.98721


 90%|█████████ | 1805/2000 [34:59<03:18,  1.02s/it]

Training_loss 16.98630


 90%|█████████ | 1806/2000 [35:01<03:20,  1.03s/it]

Training_loss 16.94763


 90%|█████████ | 1807/2000 [35:01<03:13,  1.00s/it]

Training_loss 16.96804


 90%|█████████ | 1808/2000 [35:02<03:06,  1.03it/s]

Training_loss 16.98283


 90%|█████████ | 1809/2000 [35:03<03:06,  1.02it/s]

Training_loss 16.97912


 90%|█████████ | 1810/2000 [35:04<03:03,  1.03it/s]

Training_loss 17.03752


 91%|█████████ | 1811/2000 [35:05<03:04,  1.02it/s]

Training_loss 17.02474


 91%|█████████ | 1812/2000 [35:06<03:04,  1.02it/s]

Training_loss 17.03354


 91%|█████████ | 1813/2000 [35:07<03:00,  1.03it/s]

Training_loss 16.98841


 91%|█████████ | 1814/2000 [35:08<03:01,  1.03it/s]

Training_loss 16.96509


 91%|█████████ | 1815/2000 [35:09<02:56,  1.05it/s]

Training_loss 16.96779


 91%|█████████ | 1816/2000 [35:10<02:59,  1.03it/s]

Training_loss 16.95454


 91%|█████████ | 1817/2000 [35:11<03:12,  1.05s/it]

Training_loss 16.97075


 91%|█████████ | 1818/2000 [35:12<03:06,  1.03s/it]

Training_loss 16.96646


 91%|█████████ | 1819/2000 [35:13<03:04,  1.02s/it]

Training_loss 16.99336


 91%|█████████ | 1820/2000 [35:14<02:54,  1.03it/s]

Training_loss 16.98335


 91%|█████████ | 1821/2000 [35:15<02:53,  1.03it/s]

Training_loss 16.99747


 91%|█████████ | 1822/2000 [35:16<02:48,  1.05it/s]

Training_loss 16.98659


 91%|█████████ | 1823/2000 [35:17<02:48,  1.05it/s]

Training_loss 17.01566


 91%|█████████ | 1824/2000 [35:18<03:00,  1.02s/it]

Training_loss 17.01937


 91%|█████████▏| 1825/2000 [35:19<02:58,  1.02s/it]

Training_loss 17.02592


 91%|█████████▏| 1826/2000 [35:20<02:59,  1.03s/it]

Training_loss 17.03105


 91%|█████████▏| 1827/2000 [35:21<02:51,  1.01it/s]

Training_loss 17.02741


 91%|█████████▏| 1828/2000 [35:22<02:48,  1.02it/s]

Training_loss 17.03831


 91%|█████████▏| 1829/2000 [35:23<02:48,  1.02it/s]

Training_loss 17.06545


 92%|█████████▏| 1830/2000 [35:24<02:44,  1.03it/s]

Training_loss 17.08361


 92%|█████████▏| 1831/2000 [35:25<02:50,  1.01s/it]

Training_loss 17.07001


 92%|█████████▏| 1832/2000 [35:26<02:52,  1.03s/it]

Training_loss 17.07307


 92%|█████████▏| 1833/2000 [35:28<03:04,  1.11s/it]

Training_loss 17.04537


 92%|█████████▏| 1834/2000 [35:29<02:57,  1.07s/it]

Training_loss 17.04962


 92%|█████████▏| 1835/2000 [35:30<02:53,  1.05s/it]

Training_loss 17.06052


 92%|█████████▏| 1836/2000 [35:30<02:45,  1.01s/it]

Training_loss 17.06407


 92%|█████████▏| 1837/2000 [35:31<02:40,  1.01it/s]

Training_loss 17.09804


 92%|█████████▏| 1838/2000 [35:32<02:40,  1.01it/s]

Training_loss 17.07620


 92%|█████████▏| 1839/2000 [35:33<02:36,  1.03it/s]

Training_loss 17.02641


 92%|█████████▏| 1840/2000 [35:34<02:31,  1.05it/s]

Training_loss 17.04465


 92%|█████████▏| 1841/2000 [35:35<02:30,  1.06it/s]

Training_loss 17.03696


 92%|█████████▏| 1842/2000 [35:36<02:28,  1.06it/s]

Training_loss 17.05034


 92%|█████████▏| 1843/2000 [35:37<02:30,  1.04it/s]

Training_loss 17.07248


 92%|█████████▏| 1844/2000 [35:38<02:39,  1.02s/it]

Training_loss 17.07228


 92%|█████████▏| 1845/2000 [35:40<02:55,  1.13s/it]

Training_loss 17.06627


 92%|█████████▏| 1846/2000 [35:41<02:56,  1.15s/it]

Training_loss 17.05410


 92%|█████████▏| 1847/2000 [35:42<02:48,  1.10s/it]

Training_loss 17.04046


 92%|█████████▏| 1848/2000 [35:43<02:53,  1.14s/it]

Training_loss 17.02885


 92%|█████████▏| 1849/2000 [35:44<02:49,  1.12s/it]

Training_loss 17.01681


 92%|█████████▎| 1850/2000 [35:45<02:43,  1.09s/it]

Training_loss 17.02987


 93%|█████████▎| 1851/2000 [35:46<02:39,  1.07s/it]

Training_loss 17.01278


 93%|█████████▎| 1852/2000 [35:47<02:38,  1.07s/it]

Training_loss 17.04119


 93%|█████████▎| 1853/2000 [35:48<02:31,  1.03s/it]

Training_loss 17.00782


 93%|█████████▎| 1854/2000 [35:49<02:26,  1.01s/it]

Training_loss 16.98691


 93%|█████████▎| 1855/2000 [35:50<02:25,  1.00s/it]

Training_loss 16.96147


 93%|█████████▎| 1856/2000 [35:51<02:22,  1.01it/s]

Training_loss 16.95114


 93%|█████████▎| 1857/2000 [35:52<02:20,  1.02it/s]

Training_loss 16.95136


 93%|█████████▎| 1858/2000 [35:53<02:19,  1.02it/s]

Training_loss 16.93996


 93%|█████████▎| 1859/2000 [35:54<02:17,  1.03it/s]

Training_loss 16.97177


 93%|█████████▎| 1860/2000 [35:55<02:16,  1.02it/s]

Training_loss 16.97891


 93%|█████████▎| 1861/2000 [35:56<02:14,  1.04it/s]

Training_loss 16.99211


 93%|█████████▎| 1862/2000 [35:57<02:11,  1.05it/s]

Training_loss 16.95505


 93%|█████████▎| 1863/2000 [35:58<02:07,  1.07it/s]

Training_loss 16.96410


 93%|█████████▎| 1864/2000 [35:59<02:18,  1.02s/it]

Training_loss 16.98444


 93%|█████████▎| 1865/2000 [36:00<02:28,  1.10s/it]

Training_loss 16.99176


 93%|█████████▎| 1866/2000 [36:01<02:24,  1.08s/it]

Training_loss 17.00663


 93%|█████████▎| 1867/2000 [36:02<02:19,  1.05s/it]

Training_loss 17.01307


 93%|█████████▎| 1868/2000 [36:03<02:15,  1.02s/it]

Training_loss 17.03187


 93%|█████████▎| 1869/2000 [36:04<02:10,  1.00it/s]

Training_loss 17.04893


 94%|█████████▎| 1870/2000 [36:05<02:06,  1.02it/s]

Training_loss 17.04087


 94%|█████████▎| 1871/2000 [36:06<02:02,  1.06it/s]

Training_loss 17.02912


 94%|█████████▎| 1872/2000 [36:07<02:00,  1.06it/s]

Training_loss 17.07836


 94%|█████████▎| 1873/2000 [36:08<01:58,  1.07it/s]

Training_loss 17.09407


 94%|█████████▎| 1874/2000 [36:09<02:06,  1.01s/it]

Training_loss 17.09933


 94%|█████████▍| 1875/2000 [36:10<02:13,  1.07s/it]

Training_loss 17.15738


 94%|█████████▍| 1876/2000 [36:11<02:10,  1.05s/it]

Training_loss 17.13804


 94%|█████████▍| 1877/2000 [36:12<02:05,  1.02s/it]

Training_loss 17.13394


 94%|█████████▍| 1878/2000 [36:13<02:03,  1.01s/it]

Training_loss 17.13140


 94%|█████████▍| 1879/2000 [36:14<02:14,  1.11s/it]

Training_loss 17.11660


 94%|█████████▍| 1880/2000 [36:16<02:22,  1.18s/it]

Training_loss 17.09341


 94%|█████████▍| 1881/2000 [36:17<02:10,  1.09s/it]

Training_loss 17.09393


 94%|█████████▍| 1882/2000 [36:18<02:04,  1.06s/it]

Training_loss 17.11074


 94%|█████████▍| 1883/2000 [36:19<01:58,  1.01s/it]

Training_loss 17.11079


 94%|█████████▍| 1884/2000 [36:20<01:55,  1.01it/s]

Training_loss 17.09947


 94%|█████████▍| 1885/2000 [36:21<01:53,  1.01it/s]

Training_loss 17.09462


 94%|█████████▍| 1886/2000 [36:21<01:49,  1.04it/s]

Training_loss 17.09874


 94%|█████████▍| 1887/2000 [36:22<01:52,  1.01it/s]

Training_loss 17.08533


 94%|█████████▍| 1888/2000 [36:23<01:50,  1.02it/s]

Training_loss 17.08647


 94%|█████████▍| 1889/2000 [36:24<01:48,  1.02it/s]

Training_loss 17.07769


 94%|█████████▍| 1890/2000 [36:25<01:49,  1.00it/s]

Training_loss 17.07390


 95%|█████████▍| 1891/2000 [36:26<01:50,  1.01s/it]

Training_loss 17.10643


 95%|█████████▍| 1892/2000 [36:28<01:49,  1.01s/it]

Training_loss 17.14234


 95%|█████████▍| 1893/2000 [36:29<01:49,  1.02s/it]

Training_loss 17.13505


 95%|█████████▍| 1894/2000 [36:29<01:45,  1.00it/s]

Training_loss 17.10762


 95%|█████████▍| 1895/2000 [36:31<01:46,  1.02s/it]

Training_loss 17.11727


 95%|█████████▍| 1896/2000 [36:32<01:51,  1.07s/it]

Training_loss 17.12384


 95%|█████████▍| 1897/2000 [36:33<01:49,  1.06s/it]

Training_loss 17.12935


 95%|█████████▍| 1898/2000 [36:34<01:45,  1.03s/it]

Training_loss 17.13862


 95%|█████████▍| 1899/2000 [36:35<01:43,  1.02s/it]

Training_loss 17.14757


 95%|█████████▌| 1900/2000 [36:36<01:39,  1.01it/s]

Training_loss 17.16011


 95%|█████████▌| 1901/2000 [36:37<01:39,  1.01s/it]

Training_loss 17.18053


 95%|█████████▌| 1902/2000 [36:38<01:36,  1.02it/s]

Training_loss 17.15308


 95%|█████████▌| 1903/2000 [36:39<01:35,  1.01it/s]

Training_loss 17.12526


 95%|█████████▌| 1904/2000 [36:40<01:32,  1.04it/s]

Training_loss 17.13110


 95%|█████████▌| 1905/2000 [36:41<01:33,  1.01it/s]

Training_loss 17.15074


 95%|█████████▌| 1906/2000 [36:42<01:32,  1.01it/s]

Training_loss 17.11867


 95%|█████████▌| 1907/2000 [36:43<01:32,  1.00it/s]

Training_loss 17.11817


 95%|█████████▌| 1908/2000 [36:44<01:31,  1.01it/s]

Training_loss 17.14493


 95%|█████████▌| 1909/2000 [36:45<01:28,  1.02it/s]

Training_loss 17.15508


 96%|█████████▌| 1910/2000 [36:45<01:26,  1.04it/s]

Training_loss 17.13747


 96%|█████████▌| 1911/2000 [36:47<01:30,  1.02s/it]

Training_loss 17.15438


 96%|█████████▌| 1912/2000 [36:48<01:33,  1.06s/it]

Training_loss 17.13136


 96%|█████████▌| 1913/2000 [36:49<01:30,  1.04s/it]

Training_loss 17.12056


 96%|█████████▌| 1914/2000 [36:50<01:26,  1.01s/it]

Training_loss 17.15589


 96%|█████████▌| 1915/2000 [36:51<01:24,  1.00it/s]

Training_loss 17.13878


 96%|█████████▌| 1916/2000 [36:52<01:23,  1.00it/s]

Training_loss 17.17232


 96%|█████████▌| 1917/2000 [36:53<01:22,  1.00it/s]

Training_loss 17.21670


 96%|█████████▌| 1918/2000 [36:54<01:20,  1.02it/s]

Training_loss 17.23742


 96%|█████████▌| 1919/2000 [36:55<01:20,  1.01it/s]

Training_loss 17.24537


 96%|█████████▌| 1920/2000 [36:56<01:19,  1.01it/s]

Training_loss 17.23737


 96%|█████████▌| 1921/2000 [36:56<01:15,  1.04it/s]

Training_loss 17.23889


 96%|█████████▌| 1922/2000 [36:57<01:15,  1.04it/s]

Training_loss 17.23778


 96%|█████████▌| 1923/2000 [36:59<01:16,  1.01it/s]

Training_loss 17.25027


 96%|█████████▌| 1924/2000 [37:00<01:15,  1.00it/s]

Training_loss 17.24371


 96%|█████████▋| 1925/2000 [37:01<01:15,  1.00s/it]

Training_loss 17.23425


 96%|█████████▋| 1926/2000 [37:02<01:14,  1.01s/it]

Training_loss 17.21554


 96%|█████████▋| 1927/2000 [37:03<01:19,  1.09s/it]

Training_loss 17.20053


 96%|█████████▋| 1928/2000 [37:04<01:19,  1.11s/it]

Training_loss 17.21889


 96%|█████████▋| 1929/2000 [37:05<01:14,  1.04s/it]

Training_loss 17.22099


 96%|█████████▋| 1930/2000 [37:06<01:12,  1.04s/it]

Training_loss 17.25257


 97%|█████████▋| 1931/2000 [37:07<01:10,  1.02s/it]

Training_loss 17.25787


 97%|█████████▋| 1932/2000 [37:08<01:13,  1.09s/it]

Training_loss 17.23760


 97%|█████████▋| 1933/2000 [37:10<01:20,  1.20s/it]

Training_loss 17.24697


 97%|█████████▋| 1934/2000 [37:11<01:18,  1.19s/it]

Training_loss 17.21367


 97%|█████████▋| 1935/2000 [37:12<01:11,  1.10s/it]

Training_loss 17.22424


 97%|█████████▋| 1936/2000 [37:13<01:07,  1.06s/it]

Training_loss 17.25015


 97%|█████████▋| 1937/2000 [37:14<01:06,  1.05s/it]

Training_loss 17.26441


 97%|█████████▋| 1938/2000 [37:15<01:03,  1.02s/it]

Training_loss 17.28370


 97%|█████████▋| 1939/2000 [37:16<01:00,  1.00it/s]

Training_loss 17.26894


 97%|█████████▋| 1940/2000 [37:17<00:59,  1.01it/s]

Training_loss 17.29349


 97%|█████████▋| 1941/2000 [37:18<00:58,  1.01it/s]

Training_loss 17.30344


 97%|█████████▋| 1942/2000 [37:19<00:59,  1.03s/it]

Training_loss 17.29336


 97%|█████████▋| 1943/2000 [37:20<01:01,  1.08s/it]

Training_loss 17.30363


 97%|█████████▋| 1944/2000 [37:21<00:58,  1.05s/it]

Training_loss 17.30204


 97%|█████████▋| 1945/2000 [37:22<00:56,  1.02s/it]

Training_loss 17.28453


 97%|█████████▋| 1946/2000 [37:23<00:53,  1.00it/s]

Training_loss 17.28246


 97%|█████████▋| 1947/2000 [37:24<00:53,  1.00s/it]

Training_loss 17.25543


 97%|█████████▋| 1948/2000 [37:25<00:51,  1.02it/s]

Training_loss 17.26983


 97%|█████████▋| 1949/2000 [37:26<00:51,  1.00s/it]

Training_loss 17.27367


 98%|█████████▊| 1950/2000 [37:27<00:49,  1.01it/s]

Training_loss 17.30663


 98%|█████████▊| 1951/2000 [37:28<00:47,  1.02it/s]

Training_loss 17.34243


 98%|█████████▊| 1952/2000 [37:29<00:46,  1.03it/s]

Training_loss 17.35112


 98%|█████████▊| 1953/2000 [37:29<00:44,  1.05it/s]

Training_loss 17.36936


 98%|█████████▊| 1954/2000 [37:30<00:43,  1.05it/s]

Training_loss 17.38797


 98%|█████████▊| 1955/2000 [37:31<00:42,  1.05it/s]

Training_loss 17.41278


 98%|█████████▊| 1956/2000 [37:32<00:41,  1.06it/s]

Training_loss 17.41845


 98%|█████████▊| 1957/2000 [37:33<00:40,  1.06it/s]

Training_loss 17.43769


 98%|█████████▊| 1958/2000 [37:34<00:40,  1.03it/s]

Training_loss 17.41505


 98%|█████████▊| 1959/2000 [37:35<00:42,  1.03s/it]

Training_loss 17.39943


 98%|█████████▊| 1960/2000 [37:36<00:40,  1.02s/it]

Training_loss 17.41097


 98%|█████████▊| 1961/2000 [37:37<00:38,  1.01it/s]

Training_loss 17.40265


 98%|█████████▊| 1962/2000 [37:38<00:37,  1.01it/s]

Training_loss 17.38234


 98%|█████████▊| 1963/2000 [37:39<00:36,  1.03it/s]

Training_loss 17.37441


 98%|█████████▊| 1964/2000 [37:40<00:34,  1.04it/s]

Training_loss 17.36229


 98%|█████████▊| 1965/2000 [37:41<00:33,  1.04it/s]

Training_loss 17.36627


 98%|█████████▊| 1966/2000 [37:42<00:33,  1.02it/s]

Training_loss 17.32399


 98%|█████████▊| 1967/2000 [37:43<00:31,  1.04it/s]

Training_loss 17.35012


 98%|█████████▊| 1968/2000 [37:44<00:31,  1.01it/s]

Training_loss 17.34080


 98%|█████████▊| 1969/2000 [37:45<00:30,  1.03it/s]

Training_loss 17.34938


 98%|█████████▊| 1970/2000 [37:46<00:29,  1.01it/s]

Training_loss 17.34490


 99%|█████████▊| 1971/2000 [37:47<00:28,  1.02it/s]

Training_loss 17.33581


 99%|█████████▊| 1972/2000 [37:48<00:27,  1.03it/s]

Training_loss 17.36223


 99%|█████████▊| 1973/2000 [37:49<00:26,  1.01it/s]

Training_loss 17.38421


 99%|█████████▊| 1974/2000 [37:50<00:26,  1.02s/it]

Training_loss 17.37763


 99%|█████████▉| 1975/2000 [37:51<00:27,  1.09s/it]

Training_loss 17.37868


 99%|█████████▉| 1976/2000 [37:52<00:25,  1.08s/it]

Training_loss 17.38731


 99%|█████████▉| 1977/2000 [37:53<00:23,  1.03s/it]

Training_loss 17.37189


 99%|█████████▉| 1978/2000 [37:54<00:22,  1.02s/it]

Training_loss 17.41590


 99%|█████████▉| 1979/2000 [37:55<00:21,  1.02s/it]

Training_loss 17.43715


 99%|█████████▉| 1980/2000 [37:57<00:21,  1.07s/it]

Training_loss 17.49520


 99%|█████████▉| 1981/2000 [37:58<00:20,  1.10s/it]

Training_loss 17.46112


 99%|█████████▉| 1982/2000 [37:59<00:19,  1.09s/it]

Training_loss 17.45845


 99%|█████████▉| 1983/2000 [38:00<00:18,  1.07s/it]

Training_loss 17.42138


 99%|█████████▉| 1984/2000 [38:01<00:16,  1.04s/it]

Training_loss 17.42276


 99%|█████████▉| 1985/2000 [38:02<00:15,  1.03s/it]

Training_loss 17.44709


 99%|█████████▉| 1986/2000 [38:03<00:14,  1.02s/it]

Training_loss 17.43583


 99%|█████████▉| 1987/2000 [38:04<00:12,  1.01it/s]

Training_loss 17.46478


 99%|█████████▉| 1988/2000 [38:05<00:11,  1.01it/s]

Training_loss 17.45644


 99%|█████████▉| 1989/2000 [38:06<00:10,  1.02it/s]

Training_loss 17.47097


100%|█████████▉| 1990/2000 [38:07<00:10,  1.05s/it]

Training_loss 17.46410


100%|█████████▉| 1991/2000 [38:08<00:09,  1.10s/it]

Training_loss 17.46702


100%|█████████▉| 1992/2000 [38:09<00:09,  1.14s/it]

Training_loss 17.45053


100%|█████████▉| 1993/2000 [38:11<00:08,  1.21s/it]

Training_loss 17.43745


100%|█████████▉| 1994/2000 [38:12<00:07,  1.21s/it]

Training_loss 17.43883


100%|█████████▉| 1995/2000 [38:13<00:06,  1.23s/it]

Training_loss 17.44409


100%|█████████▉| 1996/2000 [38:14<00:04,  1.17s/it]

Training_loss 17.47583


100%|█████████▉| 1997/2000 [38:15<00:03,  1.10s/it]

Training_loss 17.47299


100%|█████████▉| 1998/2000 [38:16<00:02,  1.08s/it]

Training_loss 17.46654


100%|█████████▉| 1999/2000 [38:17<00:01,  1.05s/it]

Training_loss 17.49249


100%|██████████| 2000/2000 [38:18<00:00,  1.15s/it]

Training_loss 17.48088





In [21]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([ 1.3357, -0.2490,  0.2949, -0.2663, -0.1279, -0.5051,  0.1018,  1.0945,
        -0.0629,  0.3944, -0.0255, -0.4312, -0.0193,  0.0173, -0.4775,  0.1955,
         0.0625,  0.8109,  0.0956,  0.5114, -0.4474, -0.0162,  0.6522,  0.1528,
         0.6010,  0.2775, -0.2008, -0.7141,  1.4424, -0.3200,  0.4103,  0.1399,
        -0.5005,  0.3276,  0.6776, -0.4306,  2.0459,  1.6550,  1.5424,  2.2753],
       grad_fn=<CatBackward0>)

In [22]:
for j in G.neighbors(0):
    print(j)

1
3
6
7
9


In [23]:
parameters_to_vector(models[0].parameters())

tensor([-7.7848e-01, -4.9740e-02, -4.7448e-02,  2.5144e-01,  1.7854e-01,
        -1.0460e-02,  3.1356e-01,  9.2443e-01,  4.4179e-01,  2.8600e-01,
        -1.5036e+00,  2.6853e-01, -5.0094e-01, -1.8730e-03, -3.8847e-01,
         2.9809e-01,  3.3728e-01, -5.2183e-01,  1.5949e-01, -9.5113e-01,
         2.2437e-01,  9.8373e-02,  4.3427e-01, -8.1958e-02,  4.4021e-01,
         2.9156e-01, -8.8687e-02, -1.1657e+00,  3.6460e-01,  1.6471e-01,
        -1.1556e-01, -1.3498e-01, -5.2556e-01, -5.7021e-02,  7.5317e-01,
         6.6770e-02,  1.7626e+00,  2.2856e+00,  1.7807e+00,  1.9342e+00],
       grad_fn=<CatBackward0>)

In [24]:
projection_list[0]

[0,
 tensor([[-0.3819,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000, -0.3819,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -0.3819,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ..., -0.3819,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.3819,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.3819]]),
 0,
 tensor([[-0.6528,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000, -0.6528,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -0.6528,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ..., -0.6528,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.6528,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.6528]]),
 0,
 0,
 tensor([[1.0142, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.0142, 0.0000,  

In [25]:
projected_weights[0]

[0,
 tensor([ 0.0303, -0.1080,  0.0138, -0.0288,  0.0211,  0.0477, -0.0440, -0.0666,
          0.0580, -0.0632,  0.0655,  0.0373, -0.0216, -0.1134,  0.0888, -0.0439,
          0.0187, -0.0550, -0.0065,  0.0025,  0.0670,  0.0477,  0.0694,  0.0833,
         -0.0423, -0.0824, -0.0024,  0.0939,  0.0493,  0.1214,  0.0964, -0.0048,
          0.1188,  0.1245,  0.0578, -0.0829,  0.0345,  0.0458, -0.0358,  0.0205]),
 0,
 tensor([-0.0053,  0.0014, -0.1238, -0.1032, -0.0332,  0.1639, -0.1976, -0.0771,
         -0.0167,  0.1392,  0.0956,  0.1072,  0.0241, -0.0748,  0.1011,  0.0490,
          0.0900, -0.2000, -0.1378, -0.1166, -0.0238,  0.2148,  0.0611,  0.0522,
          0.0977,  0.0308,  0.1032, -0.1950, -0.1074,  0.0938,  0.1015, -0.1574,
         -0.0990,  0.1728, -0.2150,  0.1001,  0.3163,  0.2967,  0.3190, -0.0799]),
 0,
 0,
 tensor([-0.2044,  0.1461, -0.1306,  0.1281,  0.2286,  0.0240,  0.2531, -0.1396,
         -0.0390,  0.2523, -0.1699, -0.2225, -0.1613,  0.3110, -0.0914,  0.2371,
        

In [26]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [27]:
np.save( 'training_loss_dfedu' + str(eta).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), test_loss)
#np.save('relative_error_dfedu' + str(eta).replace('.', '_'), total_rel_error)