In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.2
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-2
mu = 1e-2
no_users = sum(cluster_sizes)
batch_size = 50
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.125      0.         0.11111111 0.         0.
  0.14285714 0.14285714 0.         0.         0.         0.
  0.         0.         0.         0.125      0.         0.
  0.         0.14285714]
 [0.125      0.         0.125      0.11111111 0.         0.125
  0.         0.125      0.125      0.         0.         0.125
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.125      0.         0.         0.         0.14285714
  0.         0.         0.         0.11111111 0.11111111 0.
  0.16666667 0.         0.         0.         0.         0.
  0.         0.        ]
 [0.11111111 0.11111111 0.         0.         0.11111111 0.11111111
  0.         0.11111111 0.11111111 0.11111111 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.11111111]
 [0.         0.         0.         0.11111111 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.1

In [4]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [5]:
degree_list = [G.degree(i) for i in range(no_users)]
print(degree_list)

for i in G.neighbors(0):
    print(i)

[6, 6, 7, 5, 8, 2, 6, 6, 6, 5, 8, 8, 5, 5, 6, 5, 7, 6, 6, 7]
1
4
14
16
18
19


In [6]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.size)
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split





X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')


X, y = random_split(X_train, y_train, no_users, 1234)

In [7]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 3.0, 3.0])
W2 = np.array([-2.0, 2.0, 3.0, -3.0])
W3 = 2 * W1
W4 = 2  * W2
W = [W1, W2]
m = 200
n = 4

scaler = [1.0, -1.0]

noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i ]) + np.random.normal(0,noise_sd)
        data = X[count]
        data[:, 0:4] *= scaler[i]
        datapoints[count] = {
                'features': data,
                'degree': node_degree(count, G),
                'label': y[count],
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [8]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [9]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(9, 4, bias=False)
        self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        #output = self.fc3(x)
        return output

In [10]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [11]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        #new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        #vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(48.1133, grad_fn=<MseLossBackward0>) tensor([-0.1712,  0.1184,  0.0315,  0.1727, -0.1456, -0.1224, -0.1589, -0.0821,
        -0.2563, -0.3594,  0.4645, -0.7706,  0.5060, -0.5884, -0.5278, -0.5631,
         0.1299,  0.1909,  0.8116, -0.5697, -0.4041, -0.5035,  0.3225,  0.1931,
         0.4018,  0.3837,  1.4779,  0.1164, -0.1162, -0.2501,  0.3692, -0.3315,
        -0.2943, -0.3497, -0.2767, -0.0494, -5.3704, -2.2038, -3.2683, -2.8022]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(53.3068, grad_fn=<MseLossBackwar

3 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.04

        -0.1872, -0.2618, -0.7427,  0.0348, -4.9809, -1.9301, -2.9307, -3.8241]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
5 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) 

8 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,  1.2652e-01,  3.2804e-01, -2.4440e-01,
        -1.3928e-01, -3.0307e-01, -2.7703e-01, -1.5976e-01, -8.8077e-01,
         6.6133e-01, -2.2503e-01,  3.9601e-01, -6.7702e-01, -4.1408e-01,
        -8.6414e-01,  7.2213e-03, -2.1133e-01,  8.9880e-01, -5.7593e-01,
        -4.9425e-01, -1.3916e+00,  9.4598e-01,  4.9105e-01,  1.2200e+00,
         1.4380e+00,  1.0311e+00, -2.5709e-01,  1.4907e-01, -3.5525e-01,
         6.1660e-01, -3.0981e-01, -1.9732e-01, -4.1752e-01, -9.3903e-01,
        -7.8445e-02, -8.2894e+00, -2.0293e+00, -4.4581e+00, -3.6872e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.04

torch.Size([50, 1])
torch.Size([50, 1])
10 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
10 tensor(54.3241, grad_fn=<MseLossBackw

13 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612,  0.1306, -0.0974, -0.0320, -0.1200, -0.1327,
        -0.2159, -0.9343,  0.7183, -0.3845,  0.3586, -0.5278, -0.1981, -0.4707,
         0.0325,  0.0971,  0.8191, -0.4291, -1.1948, -0.0643,  0.0382, -0.3546,
         0.1180,  0.0091,  1.3440, -0.0102, -0.1002, -0.2055,  0.4259, -0.2943,
        -0.2618, -0.3508, -0.4526,  0.0705, -5.1247, -2.1413, -3.4694, -2.7520]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
13 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01,

        -0.7694, -0.6062, -0.1899,  0.3722, -4.9449, -3.3032, -2.2195, -4.1252]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
16 tensor(48.1133, grad_fn=<MseLossBackward0>) tensor([-0.1712,  0.1184,  0.0315,  0.1727, -0.1456, -0.1224, -0.1589, -0.0821,
        -0.2563, -0.3594,  0.4645, -0.7706,  0.5060, -0.5884, -0.5278, -0.5631,
         0.1299,  0.1909,  0.8116, -0.5697, -0.4041, -0.5035,  0.3225,  0.1931,
         0.4018,  0.3837,  1.4779,  0.1164, -0.1162, -0.2501,  0.3692, -0.3315,
        -0.2943, -0.3497, -0.2767, -0.0494, -5.3704, -2.2038, -3.2683, -2.8022])

18 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.1149,  0.0523,  0.0115,  0.2865, -0.2485, -0.2071, -0.2594, -0.1766,
        -0.2963, -0.0209,  0.0935, -0.5777,  0.6292, -0.8115, -0.7126, -0.8700,
         0.1842, -0.0299,  0.7067, -0.4106, -0.1510, -0.8318,  0.7789,  0.3979,
         0.8232,  0.6334,  1.7606,  0.2947, -0.3412, -0.3826,  0.5546, -0.4995,
        -0.4538, -0.5341, -0.3883, -0.2730, -7.0829, -1.6964, -4.0649, -4.4014]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
18 tensor(45.8731, grad_fn=<MseLossBackward0>) tensor([-0.0237,  0.0147,  0.0353

21 tensor(53.6040, grad_fn=<MseLossBackward0>) tensor([-0.0851,  0.0385,  0.0722,  0.2075, -0.1525, -0.1163, -0.1715, -0.1716,
        -0.1672, -0.2695,  0.3893, -0.9661,  0.5759, -0.4012, -0.2815, -0.3967,
        -0.1076,  0.3917,  0.5674, -0.4016, -0.4106, -0.4963,  0.2445, -0.0693,
         0.2442,  0.7905,  1.2199,  0.2028, -0.2496, -0.3884,  0.4438, -0.2437,
        -0.1872, -0.2618, -0.7427,  0.0348, -4.9809, -1.9301, -2.9307, -3.8241]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
21 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467

torch.Size([50, 1])
torch.Size([50, 1])
24 tensor(47.8869, grad_fn=<MseLossBackward0>) tensor([-0.1568,  0.0857,  0.1464,  0.1529, -0.2005, -0.1328, -0.2207, -0.0095,
        -0.1709, -0.3195,  0.3070, -0.5764,  0.1881, -0.4178, -0.5028, -0.4558,
         0.3959, -0.0962,  0.5340, -0.2017, -0.6711, -0.5169,  0.6455,  0.1405,
         0.6453,  0.1853,  1.4098,  0.1029, -0.1576, -0.3131,  0.3097, -0.3123,
        -0.2400, -0.3093, -0.1061, -0.1661, -6.0437, -1.8537, -3.6758, -2.5305]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
24 tensor(53.6040, grad_fn=<MseLossBackw

torch.Size([50, 1])
torch.Size([50, 1])
27 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612,  0.1306, -0.0974, -0.0320, -0.1200, -0.1327,
        -0.2159, -0.9343,  0.7183, -0.3845,  0.3586, -0.5278, -0.1981, -0.4707,
         0.0325,  0.0971,  0.8191, -0.4291, -1.1948, -0.0643,  0.0382, -0.3546,
         0.1180,  0.0091,  1.3440, -0.0102, -0.1002, -0.2055,  0.4259, -0.2943,
        -0.2618, -0.3508, -0.4526,  0.0705, -5.1247, -2.1413, -3.4694, -2.7520]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
27 tensor(52.0993, grad_fn=<MseLossBackw

30 tensor(48.1133, grad_fn=<MseLossBackward0>) tensor([-0.1712,  0.1184,  0.0315,  0.1727, -0.1456, -0.1224, -0.1589, -0.0821,
        -0.2563, -0.3594,  0.4645, -0.7706,  0.5060, -0.5884, -0.5278, -0.5631,
         0.1299,  0.1909,  0.8116, -0.5697, -0.4041, -0.5035,  0.3225,  0.1931,
         0.4018,  0.3837,  1.4779,  0.1164, -0.1162, -0.2501,  0.3692, -0.3315,
        -0.2943, -0.3497, -0.2767, -0.0494, -5.3704, -2.2038, -3.2683, -2.8022]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
30 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612

32 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
32 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759, 

35 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
35 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759, 

torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
38 tensor(54.3241, grad_fn=<MseLossBackw

40 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

41 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
41 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759, 

torch.Size([50, 1])
torch.Size([50, 1])
43 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
        

torch.Size([50, 1])
torch.Size([50, 1])
44 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,  1.2652e-01,  3.2804e-01, -2.4440e-01,
        -1.3928e-01, -3.0307e-01, -2.7703e-01, -1.5976e-01, -8.8077e-01,
         6.6133e-01, -2.2503e-01,  3.9601e-01, -6.7702e-01, -4.1408e-01,
        -8.6414e-01,  7.2213e-03, -2.1133e-01,  8.9880e-01, -5.7593e-01,
        -4.9425e-01, -1.3916e+00,  9.4598e-01,  4.9105e-01,  1.2200e+00,
         1.4380e+00,  1.0311e+00, -2.5709e-01,  1.4907e-01, -3.5525e-01,
         6.1660e-01, -3.0981e-01, -1.9732e-01, -4.1752e-01, -9.3903e-01,
        -7.8445e-02, -8.2894e+00, -2.0293e+00, -4.4581e+00, -3.6872e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
        

45 tensor(49.8704, grad_fn=<MseLossBackward0>) tensor([-0.1427,  0.1122,  0.0664,  0.1705, -0.1715, -0.1133, -0.1732, -0.0482,
        -0.1107, -0.2952,  0.4130, -0.3310,  1.0071, -1.1160, -0.6771, -1.0591,
         0.0706,  0.4049,  0.5746, -0.3155, -0.3438, -0.2799,  0.2964, -0.3677,
         0.1790,  0.5955,  1.6424,  0.2445, -0.2234, -0.2931,  0.3959, -0.4678,
        -0.2697, -0.4049, -0.1906, -0.1298, -4.9053, -2.2044, -3.4560, -3.2506]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
45 tensor(47.8869, grad_fn=<MseLossBackward0>) tensor([-0.1568,  0.0857,  0.1464

        -0.1872, -0.2618, -0.7427,  0.0348, -4.9809, -1.9301, -2.9307, -3.8241]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
46 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423])

47 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
47 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759, 

49 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612,  0.1306, -0.0974, -0.0320, -0.1200, -0.1327,
        -0.2159, -0.9343,  0.7183, -0.3845,  0.3586, -0.5278, -0.1981, -0.4707,
         0.0325,  0.0971,  0.8191, -0.4291, -1.1948, -0.0643,  0.0382, -0.3546,
         0.1180,  0.0091,  1.3440, -0.0102, -0.1002, -0.2055,  0.4259, -0.2943,
        -0.2618, -0.3508, -0.4526,  0.0705, -5.1247, -2.1413, -3.4694, -2.7520]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
49 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01,

torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(58.4502, grad_fn=<MseLossBackward0>) tensor([ 0.0749, -0.1441,  0.0740,  0.3077, -0.3755, -0.3342, -0.3779, -0.0855,
        -0.1255, -0.2587,  0.1164,  0.0287,  0.3731, -1.1589, -0.6936, -1.1502,
         0.6204, -0.0090,  0.0180,  0.3190, -0.0972, -1.0130,  1.3620,  0.9513,
         1.3365,  0.1911,  1.1794,  0.2546, -0.4212, -0.2019,  0.6356, -0.6158,
        -0.4548, -0.5794, -0.5980, -0.2751, -7.2799, -2.3895, -3.2513, -4.5577]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
50 tensor(49.8704, grad_fn=<MseLossBackw

51 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.1149,  0.0523,  0.0115,  0.2865, -0.2485, -0.2071, -0.2594, -0.1766,
        -0.2963, -0.0209,  0.0935, -0.5777,  0.6292, -0.8115, -0.7126, -0.8700,
         0.1842, -0.0299,  0.7067, -0.4106, -0.1510, -0.8318,  0.7789,  0.3979,
         0.8232,  0.6334,  1.7606,  0.2947, -0.3412, -0.3826,  0.5546, -0.4995,
        -0.4538, -0.5341, -0.3883, -0.2730, -7.0829, -1.6964, -4.0649, -4.4014]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
51 tensor(45.8731, grad_fn=<MseLossBackward0>) tensor([-0.0237,  0.0147,  0.0353

53 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,  1.2652e-01,  3.2804e-01, -2.4440e-01,
        -1.3928e-01, -3.0307e-01, -2.7703e-01, -1.5976e-01, -8.8077e-01,
         6.6133e-01, -2.2503e-01,  3.9601e-01, -6.7702e-01, -4.1408e-01,
        -8.6414e-01,  7.2213e-03, -2.1133e-01,  8.9880e-01, -5.7593e-01,
        -4.9425e-01, -1.3916e+00,  9.4598e-01,  4.9105e-01,  1.2200e+00,
         1.4380e+00,  1.0311e+00, -2.5709e-01,  1.4907e-01, -3.5525e-01,
         6.1660e-01, -3.0981e-01, -1.9732e-01, -4.1752e-01, -9.3903e-01,
        -7.8445e-02, -8.2894e+00, -2.0293e+00, -4.4581e+00, -3.6872e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

        -0.2191, -0.3556, -0.3467,  0.0821, -3.8840, -2.7660, -1.8236, -3.7477]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
54 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703])

torch.Size([50, 1])
torch.Size([50, 1])
56 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612,  0.1306, -0.0974, -0.0320, -0.1200, -0.1327,
        -0.2159, -0.9343,  0.7183, -0.3845,  0.3586, -0.5278, -0.1981, -0.4707,
         0.0325,  0.0971,  0.8191, -0.4291, -1.1948, -0.0643,  0.0382, -0.3546,
         0.1180,  0.0091,  1.3440, -0.0102, -0.1002, -0.2055,  0.4259, -0.2943,
        -0.2618, -0.3508, -0.4526,  0.0705, -5.1247, -2.1413, -3.4694, -2.7520]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
56 tensor(52.0993, grad_fn=<MseLossBackw

57 tensor(58.4502, grad_fn=<MseLossBackward0>) tensor([ 0.0749, -0.1441,  0.0740,  0.3077, -0.3755, -0.3342, -0.3779, -0.0855,
        -0.1255, -0.2587,  0.1164,  0.0287,  0.3731, -1.1589, -0.6936, -1.1502,
         0.6204, -0.0090,  0.0180,  0.3190, -0.0972, -1.0130,  1.3620,  0.9513,
         1.3365,  0.1911,  1.1794,  0.2546, -0.4212, -0.2019,  0.6356, -0.6158,
        -0.4548, -0.5794, -0.5980, -0.2751, -7.2799, -2.3895, -3.2513, -4.5577]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(49.8704, grad_fn=<MseLossBackward0>) tensor([-0.1427,  0.1122,  0.0664

torch.Size([50, 1])
58 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
58 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.11

torch.Size([50, 1])
torch.Size([50, 1])
60 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612,  0.1306, -0.0974, -0.0320, -0.1200, -0.1327,
        -0.2159, -0.9343,  0.7183, -0.3845,  0.3586, -0.5278, -0.1981, -0.4707,
         0.0325,  0.0971,  0.8191, -0.4291, -1.1948, -0.0643,  0.0382, -0.3546,
         0.1180,  0.0091,  1.3440, -0.0102, -0.1002, -0.2055,  0.4259, -0.2943,
        -0.2618, -0.3508, -0.4526,  0.0705, -5.1247, -2.1413, -3.4694, -2.7520]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
60 tensor(52.0993, grad_fn=<MseLossBackw

61 tensor(44.0211, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0807,  0.0757,  0.1455, -0.1770, -0.1075, -0.1819, -0.0475,
        -0.2035, -0.2358,  0.2552, -0.5164,  0.3647, -0.7310, -0.6183, -0.6984,
         0.4161,  0.4059,  0.5158, -0.3300, -0.3847, -0.4691,  0.5870,  0.1786,
         0.5838,  0.2327,  1.1343,  0.2115, -0.2981, -0.3981,  0.4229, -0.4676,
        -0.4116, -0.4633, -0.2379, -0.0459, -5.2156, -2.5008, -3.0158, -3.7504]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
61 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,

        -0.2400, -0.3093, -0.1061, -0.1661, -6.0437, -1.8537, -3.6758, -2.5305]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
62 tensor(53.6040, grad_fn=<MseLossBackward0>) tensor([-0.0851,  0.0385,  0.0722,  0.2075, -0.1525, -0.1163, -0.1715, -0.1716,
        -0.1672, -0.2695,  0.3893, -0.9661,  0.5759, -0.4012, -0.2815, -0.3967,
        -0.1076,  0.3917,  0.5674, -0.4016, -0.4106, -0.4963,  0.2445, -0.0693,
         0.2442,  0.7905,  1.2199,  0.2028, -0.2496, -0.3884,  0.4438, -0.2437,
        -0.1872, -0.2618, -0.7427,  0.0348, -4.9809, -1.9301, -2.9307, -3.8241])

64 tensor(48.1133, grad_fn=<MseLossBackward0>) tensor([-0.1712,  0.1184,  0.0315,  0.1727, -0.1456, -0.1224, -0.1589, -0.0821,
        -0.2563, -0.3594,  0.4645, -0.7706,  0.5060, -0.5884, -0.5278, -0.5631,
         0.1299,  0.1909,  0.8116, -0.5697, -0.4041, -0.5035,  0.3225,  0.1931,
         0.4018,  0.3837,  1.4779,  0.1164, -0.1162, -0.2501,  0.3692, -0.3315,
        -0.2943, -0.3497, -0.2767, -0.0494, -5.3704, -2.2038, -3.2683, -2.8022]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
64 tensor(53.3068, grad_fn=<MseLossBackward0>) tensor([-0.2131,  0.0982,  0.1612

65 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,  1.2652e-01,  3.2804e-01, -2.4440e-01,
        -1.3928e-01, -3.0307e-01, -2.7703e-01, -1.5976e-01, -8.8077e-01,
         6.6133e-01, -2.2503e-01,  3.9601e-01, -6.7702e-01, -4.1408e-01,
        -8.6414e-01,  7.2213e-03, -2.1133e-01,  8.9880e-01, -5.7593e-01,
        -4.9425e-01, -1.3916e+00,  9.4598e-01,  4.9105e-01,  1.2200e+00,
         1.4380e+00,  1.0311e+00, -2.5709e-01,  1.4907e-01, -3.5525e-01,
         6.1660e-01, -3.0981e-01, -1.9732e-01, -4.1752e-01, -9.3903e-01,
        -7.8445e-02, -8.2894e+00, -2.0293e+00, -4.4581e+00, -3.6872e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

67 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

68 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
68 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.1149,  0.0523,  0.0115

torch.Size([50, 1])
torch.Size([50, 1])
70 tensor(44.0211, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0807,  0.0757,  0.1455, -0.1770, -0.1075, -0.1819, -0.0475,
        -0.2035, -0.2358,  0.2552, -0.5164,  0.3647, -0.7310, -0.6183, -0.6984,
         0.4161,  0.4059,  0.5158, -0.3300, -0.3847, -0.4691,  0.5870,  0.1786,
         0.5838,  0.2327,  1.1343,  0.2115, -0.2981, -0.3981,  0.4229, -0.4676,
        -0.4116, -0.4633, -0.2379, -0.0459, -5.2156, -2.5008, -3.0158, -3.7504]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
70 tensor(72.3950, grad_fn=<MseLossBackw

torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.1149,  0.0523,  0.0115,  0.2865, -0.2485, -0.2071, -0.2594, -0.1766,
        -0.2963, -0.0209,  0.0935, -0.5777,  0.6292, -0.8115, -0.7126, -0.8700,
         0.1842, -0.0299,  0.7067, -0.4106, -0.1510, -0.8318,  0.7789,  0.3979,
         0.8232,  0.6334,  1.7606,  0.2947, -0.3412, -0.3826,  0.5546, -0.4995,
        -0.4538, -0.5341, -0.3883, -0.2730, -7.0829, -1.6964, -4.0649, -4.4014]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
71 tensor(45.8731, grad_fn=<MseLossBackw

73 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
74 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.1149,  0.0523,  0.0115,  0.2865, -0.2485, -0.2071, -0.2594, -0.1766,
        -0.2963, -0.0209,  0.0935, -0.5777,  0.6292, -0.8115, -0.7126, -0.8700,
         0.1842, -0.0299,  0.7067, -0.4106, -0.1510, -0.8318,  0.7789,  0.3979,
         0.8232,  0.6334,  1.7606,  0.2947, -0.3412, -0.3826,  0.5546, -0.4995,
        -0.4538, -0.5341, -0.3883, -0.2730, -7.0829, -1.6964, -4.0649, -4.4014])

76 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

77 tensor(56.9826, grad_fn=<MseLossBackward0>) tensor([-0.2296,  0.1556,  0.2467,  0.1827, -0.1728, -0.1181, -0.2050, -0.1072,
        -0.1282, -0.8052,  0.6589,  0.1511,  0.6946, -0.8982, -0.7385, -0.9998,
        -0.1878,  0.0857,  0.8678, -0.5003, -1.0721, -0.5816,  0.5989,  0.0631,
         0.7116,  0.6026,  1.0276,  0.0905, -0.1734, -0.3006,  0.4082, -0.2120,
        -0.2488, -0.2911, -0.6767,  0.1957, -6.8923, -1.6509, -3.9710, -3.2423]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
77 tensor(54.3241, grad_fn=<MseLossBackward0>) tensor([-0.1149,  0.0523,  0.0115

79 tensor(44.0211, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0807,  0.0757,  0.1455, -0.1770, -0.1075, -0.1819, -0.0475,
        -0.2035, -0.2358,  0.2552, -0.5164,  0.3647, -0.7310, -0.6183, -0.6984,
         0.4161,  0.4059,  0.5158, -0.3300, -0.3847, -0.4691,  0.5870,  0.1786,
         0.5838,  0.2327,  1.1343,  0.2115, -0.2981, -0.3981,  0.4229, -0.4676,
        -0.4116, -0.4633, -0.2379, -0.0459, -5.2156, -2.5008, -3.0158, -3.7504]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
79 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,

torch.Size([50, 1])
torch.Size([50, 1])
80 tensor(45.8731, grad_fn=<MseLossBackward0>) tensor([-0.0237,  0.0147,  0.0353,  0.1938, -0.1491, -0.1778, -0.1900,  0.0129,
        -0.0184, -0.1864,  0.2878, -1.0085,  0.3045, -0.4278, -0.2467, -0.5935,
         0.2988,  0.4817,  0.2703, -0.1993, -0.0108, -0.6076,  0.4186,  0.3419,
         0.5890, -0.0375,  0.6442,  0.1190, -0.1468, -0.5692,  0.4571, -0.2927,
        -0.2191, -0.3556, -0.3467,  0.0821, -3.8840, -2.7660, -1.8236, -3.7477]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
80 tensor(44.9354, grad_fn=<MseLossBackw

        -6.5728e-02, -8.2186e+00, -2.4799e+00, -4.4364e+00, -3.9686e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
82 tensor(44.0211, grad_fn=<MseLossBackward0>) tensor([-0.1367,  0.0807,  0.0757,  0.1455, -0.1770, -0.1075, -0.1819, -0.0475,
        -0.2035, -0.2358,  0.2552, -0.5164,  0.3647, -0.7310, -0.6183, -0.6984,
         0.4161,  0.4059,  0.5158, -0.3300, -0.3847, -0.4691,  0.5870,  0.1786,
         0.5838,  0.2327,  1.1343,  0.2115, -0.2981, -0.3981,  0.4229, -0.4676,
        -0.4116, -0.4633, -0.2379, -0.0459, -5.2156, -2.5008, -3.0158, -3.7504]) tensor

        -0.2400, -0.3093, -0.1061, -0.1661, -6.0437, -1.8537, -3.6758, -2.5305]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
83 tensor(53.6040, grad_fn=<MseLossBackward0>) tensor([-0.0851,  0.0385,  0.0722,  0.2075, -0.1525, -0.1163, -0.1715, -0.1716,
        -0.1672, -0.2695,  0.3893, -0.9661,  0.5759, -0.4012, -0.2815, -0.3967,
        -0.1076,  0.3917,  0.5674, -0.4016, -0.4106, -0.4963,  0.2445, -0.0693,
         0.2442,  0.7905,  1.2199,  0.2028, -0.2496, -0.3884,  0.4438, -0.2437,
        -0.1872, -0.2618, -0.7427,  0.0348, -4.9809, -1.9301, -2.9307, -3.8241])

84 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759,  0.2998, -0.2669, -0.3524, -0.2777, -0.0870,
         0.1705, -0.6371,  0.7849, -0.9612,  1.6382, -1.4582, -1.9252, -1.5170,
        -0.4753,  0.9313, -0.7607,  0.7809,  0.1451, -0.7576,  0.7710,  0.1253,
         0.8755, -0.1063,  0.8679, -0.2546,  0.3136, -0.3841,  0.6546, -0.5827,
        -0.7694, -0.6062, -0.1899,  0.3722, -4.9449, -3.3032, -2.2195, -4.1252]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
85 tensor(48.1133, grad_fn=<MseLossBackward0>) tensor([-0.1712,  0.1184,  0.0315

86 tensor(46.9532, grad_fn=<MseLossBackward0>) tensor([-1.5483e-01,  1.0012e-01,  1.7421e-01,  2.6793e-01, -2.8708e-01,
        -1.8139e-01, -3.1492e-01, -8.9693e-02, -2.2301e-01, -5.8210e-01,
         5.8873e-01, -2.9506e-01,  6.0957e-01, -1.0777e+00, -3.5505e-01,
        -1.0627e+00,  3.7015e-01, -3.2819e-01,  8.1822e-01, -5.4609e-01,
        -6.7262e-01, -1.0227e+00,  1.0904e+00,  5.7656e-01,  1.2016e+00,
         4.3585e-01,  1.2384e+00,  1.9766e-03, -6.5702e-03, -2.8567e-01,
         6.4266e-01, -4.7746e-01, -4.7358e-01, -5.2764e-01, -4.2047e-01,
        -6.5728e-02, -8.2186e+00, -2.4799e+00, -4.4364e+00, -3.9686e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

87 tensor(49.8704, grad_fn=<MseLossBackward0>) tensor([-0.1427,  0.1122,  0.0664,  0.1705, -0.1715, -0.1133, -0.1732, -0.0482,
        -0.1107, -0.2952,  0.4130, -0.3310,  1.0071, -1.1160, -0.6771, -1.0591,
         0.0706,  0.4049,  0.5746, -0.3155, -0.3438, -0.2799,  0.2964, -0.3677,
         0.1790,  0.5955,  1.6424,  0.2445, -0.2234, -0.2931,  0.3959, -0.4678,
        -0.2697, -0.4049, -0.1906, -0.1298, -4.9053, -2.2044, -3.4560, -3.2506]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
87 tensor(47.8869, grad_fn=<MseLossBackward0>) tensor([-0.1568,  0.0857,  0.1464

88 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759,  0.2998, -0.2669, -0.3524, -0.2777, -0.0870,
         0.1705, -0.6371,  0.7849, -0.9612,  1.6382, -1.4582, -1.9252, -1.5170,
        -0.4753,  0.9313, -0.7607,  0.7809,  0.1451, -0.7576,  0.7710,  0.1253,
         0.8755, -0.1063,  0.8679, -0.2546,  0.3136, -0.3841,  0.6546, -0.5827,
        -0.7694, -0.6062, -0.1899,  0.3722, -4.9449, -3.3032, -2.2195, -4.1252]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
89 tensor(48.1133, grad_fn=<MseLossBackward0>) tensor([-0.1712,  0.1184,  0.0315

90 tensor(58.4502, grad_fn=<MseLossBackward0>) tensor([ 0.0749, -0.1441,  0.0740,  0.3077, -0.3755, -0.3342, -0.3779, -0.0855,
        -0.1255, -0.2587,  0.1164,  0.0287,  0.3731, -1.1589, -0.6936, -1.1502,
         0.6204, -0.0090,  0.0180,  0.3190, -0.0972, -1.0130,  1.3620,  0.9513,
         1.3365,  0.1911,  1.1794,  0.2546, -0.4212, -0.2019,  0.6356, -0.6158,
        -0.4548, -0.5794, -0.5980, -0.2751, -7.2799, -2.3895, -3.2513, -4.5577]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
90 tensor(49.8704, grad_fn=<MseLossBackward0>) tensor([-0.1427,  0.1122,  0.0664

torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
91 tensor(38.5651, grad_fn=<MseLossBackwar

93 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -1.7787e-01,  4.9626e-02, -5.1159e-01,
         6.5561e-01, -4.5915e-01, -2.3576e-01, -4.5066e-01, -5.7723e-01,
        -4.4347e-02, -6.4704e+00, -3.1280e+00, -3.0150e+00, -4.2302e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

94 tensor(49.8704, grad_fn=<MseLossBackward0>) tensor([-0.1427,  0.1122,  0.0664,  0.1705, -0.1715, -0.1133, -0.1732, -0.0482,
        -0.1107, -0.2952,  0.4130, -0.3310,  1.0071, -1.1160, -0.6771, -1.0591,
         0.0706,  0.4049,  0.5746, -0.3155, -0.3438, -0.2799,  0.2964, -0.3677,
         0.1790,  0.5955,  1.6424,  0.2445, -0.2234, -0.2931,  0.3959, -0.4678,
        -0.2697, -0.4049, -0.1906, -0.1298, -4.9053, -2.2044, -3.4560, -3.2506]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
94 tensor(47.8869, grad_fn=<MseLossBackward0>) tensor([-0.1568,  0.0857,  0.1464

95 tensor(44.9354, grad_fn=<MseLossBackward0>) tensor([-0.0786,  0.0442,  0.0254,  0.2368, -0.2325, -0.1483, -0.2400, -0.0375,
        -0.1398, -1.0750,  0.9824, -0.6112,  0.6428, -0.6682, -0.2831, -0.6963,
        -0.1282,  0.0679,  0.4449, -0.3694, -0.2023, -0.6868,  0.8514,  0.2821,
         0.8533, -0.2469,  0.7725,  0.0724, -0.0906, -0.3808,  0.5113, -0.3963,
        -0.3242, -0.4304, -0.3080, -0.0274, -5.4515, -2.5752, -2.8122, -3.5703]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([4, 1])
torch.Size([4, 1])
95 tensor(38.5651, grad_fn=<MseLossBackward0>) tensor([-0.1166,  0.1437, -0.1759, 

        -0.2618, -0.3508, -0.4526,  0.0705, -5.1247, -2.1413, -3.4694, -2.7520]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
97 tensor(52.0993, grad_fn=<MseLossBackward0>) tensor([-2.0076e-01,  1.0459e-01, -6.2969e-04,  2.6472e-01, -2.5556e-01,
        -1.8682e-01, -2.6294e-01, -9.4497e-02, -1.0981e-01, -1.1964e+00,
         9.6917e-01, -7.7178e-01,  7.0624e-01, -8.1397e-01, -1.6968e-01,
        -7.2543e-01, -6.4447e-02,  2.4457e-01,  5.7671e-01, -2.7516e-01,
        -1.0725e-01, -1.1363e+00,  9.7476e-01,  5.7101e-01,  9.5354e-01,
         5.0487e-01,  8.7981e-01, -

98 tensor(72.3950, grad_fn=<MseLossBackward0>) tensor([-2.5430e-01,  1.8072e-01,  1.2652e-01,  3.2804e-01, -2.4440e-01,
        -1.3928e-01, -3.0307e-01, -2.7703e-01, -1.5976e-01, -8.8077e-01,
         6.6133e-01, -2.2503e-01,  3.9601e-01, -6.7702e-01, -4.1408e-01,
        -8.6414e-01,  7.2213e-03, -2.1133e-01,  8.9880e-01, -5.7593e-01,
        -4.9425e-01, -1.3916e+00,  9.4598e-01,  4.9105e-01,  1.2200e+00,
         1.4380e+00,  1.0311e+00, -2.5709e-01,  1.4907e-01, -3.5525e-01,
         6.1660e-01, -3.0981e-01, -1.9732e-01, -4.1752e-01, -9.3903e-01,
        -7.8445e-02, -8.2894e+00, -2.0293e+00, -4.4581e+00, -3.6872e+00]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0

        -0.2400, -0.3093, -0.1061, -0.1661, -6.0437, -1.8537, -3.6758, -2.5305]) tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
99 tensor(53.6040, grad_fn=<MseLossBackward0>) tensor([-0.0851,  0.0385,  0.0722,  0.2075, -0.1525, -0.1163, -0.1715, -0.1716,
        -0.1672, -0.2695,  0.3893, -0.9661,  0.5759, -0.4012, -0.2815, -0.3967,
        -0.1076,  0.3917,  0.5674, -0.4016, -0.4106, -0.4963,  0.2445, -0.0693,
         0.2442,  0.7905,  1.2199,  0.2028, -0.2496, -0.3884,  0.4438, -0.2437,
        -0.1872, -0.2618, -0.7427,  0.0348, -4.9809, -1.9301, -2.9307, -3.8241])

In [12]:
parameters_to_vector(model.parameters())

tensor([ 0.2235, -0.0973, -0.2852, -0.2029,  0.2218,  0.2281,  0.2715,  0.0543,
         0.2085,  0.1030, -0.2429,  0.2934,  0.2813,  0.2645, -0.0455,  0.2404,
        -0.2080, -0.2315,  0.0014, -0.1183, -0.1677, -0.0731,  0.0520, -0.2141,
         0.3324,  0.0407,  0.2600, -0.1340,  0.0254,  0.2640, -0.2909,  0.2186,
         0.1556, -0.1506,  0.1190,  0.0011,  0.0475,  0.2595, -0.2087,  0.1037],
       grad_fn=<CatBackward0>)

In [13]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [14]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3
                                   )

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                #grads = grads_to_vector(model.parameters())
                optimizer.step()
                train_loss += loss.item()*data.size(0)
                
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [15]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0 + 1.0 * float(np.random.randn(1)))
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [16]:
print(projection_list[0])

[0, tensor([[1.9948, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.9948, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.9948,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.9948, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.9948, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.9948]]), 0, 0, tensor([[1.1173, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.1173, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.1173,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.1173, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.1173, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.1173]]), 0, 0, 0, 0, 0, 0, 0, 0, 0, tensor([[1.1013, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.1013, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.1013,  ...

In [17]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [18]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [19]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([ 0.3054, -0.2010, -0.1876, -0.0028,  0.2577,  0.1404,  0.0177,  0.1863,
        -0.3272,  0.1281, -0.1372,  0.2988, -0.2581, -0.2330,  0.3324,  0.0934,
         0.0263,  0.1237,  0.2444, -0.1767,  0.1775,  0.3122,  0.1055, -0.1585,
         0.0680, -0.3066,  0.3181,  0.2193,  0.0477, -0.0470, -0.2661,  0.1525,
         0.1462,  0.2871,  0.1531, -0.2106,  0.2963,  0.1908, -0.1184, -0.1955])


tensor([ 0.6109, -0.4020, -0.3753, -0.0055,  0.5155,  0.2807,  0.0354,  0.3726,
        -0.6543,  0.2561, -0.2744,  0.5975, -0.5162, -0.4660,  0.6647,  0.1868,
         0.0526,  0.2474,  0.4887, -0.3534,  0.3551,  0.6245,  0.2110, -0.3171,
         0.1361, -0.6132,  0.6363,  0.4387,  0.0955, -0.0940, -0.5323,  0.3050,
         0.2925,  0.5741,  0.3063, -0.4212,  0.5927,  0.3816, -0.2369, -0.3910],
       grad_fn=<CatBackward0>)

In [20]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    
    #Share and mix the local weights
    
    
    for i in range(no_users):
        weights = parameters_to_vector(dummy_models[i].parameters())
        mat_vec_sum = torch.zeros_like(weights)
        for j in G.neighbors(i):
            mat_vec_sum = torch.add(mat_vec_sum, parameters_to_vector(dummy_models[j].parameters()))
        
        new_weights = weights - mu * eta * (degree_list[i] * weights - mat_vec_sum)
        # Update real models
        vector_to_parameters(parameters=models[i].parameters(), vec=new_weights)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        #user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #total_rel_error.append(user_rel_error / no_users)
    #g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/2000 [00:00<13:37,  2.44it/s]

Training_loss 52.32741


  0%|          | 2/2000 [00:00<10:47,  3.09it/s]

Training_loss 52.23904


  0%|          | 3/2000 [00:01<12:18,  2.70it/s]

Training_loss 52.06570


  0%|          | 4/2000 [00:01<11:55,  2.79it/s]

Training_loss 51.92491


  0%|          | 5/2000 [00:01<11:07,  2.99it/s]

Training_loss 51.85653


  0%|          | 6/2000 [00:01<10:11,  3.26it/s]

Training_loss 51.73429


  0%|          | 7/2000 [00:02<09:29,  3.50it/s]

Training_loss 51.64761


  0%|          | 8/2000 [00:02<10:10,  3.26it/s]

Training_loss 51.54896


  0%|          | 9/2000 [00:02<10:27,  3.18it/s]

Training_loss 51.43665


  0%|          | 10/2000 [00:03<11:42,  2.83it/s]

Training_loss 51.34339


  1%|          | 11/2000 [00:03<11:48,  2.81it/s]

Training_loss 51.25119


  1%|          | 12/2000 [00:03<10:33,  3.14it/s]

Training_loss 51.11907


  1%|          | 13/2000 [00:04<09:40,  3.42it/s]

Training_loss 50.98284


  1%|          | 14/2000 [00:04<09:46,  3.39it/s]

Training_loss 50.83908


  1%|          | 15/2000 [00:04<09:06,  3.63it/s]

Training_loss 50.71791


  1%|          | 16/2000 [00:04<09:14,  3.58it/s]

Training_loss 50.60900


  1%|          | 17/2000 [00:05<09:14,  3.58it/s]

Training_loss 50.53671


  1%|          | 18/2000 [00:05<10:59,  3.01it/s]

Training_loss 50.43974


  1%|          | 19/2000 [00:06<11:13,  2.94it/s]

Training_loss 50.33542


  1%|          | 20/2000 [00:06<11:44,  2.81it/s]

Training_loss 50.15958


  1%|          | 21/2000 [00:06<12:48,  2.58it/s]

Training_loss 50.06764


  1%|          | 22/2000 [00:07<11:32,  2.86it/s]

Training_loss 49.98768


  1%|          | 23/2000 [00:07<10:18,  3.20it/s]

Training_loss 49.91177


  1%|          | 24/2000 [00:07<09:32,  3.45it/s]

Training_loss 49.82238


  1%|▏         | 25/2000 [00:07<09:00,  3.65it/s]

Training_loss 49.75655


  1%|▏         | 26/2000 [00:08<08:45,  3.75it/s]

Training_loss 49.65999


  1%|▏         | 27/2000 [00:08<08:16,  3.97it/s]

Training_loss 49.58601


  1%|▏         | 28/2000 [00:08<08:12,  4.00it/s]

Training_loss 49.48862


  1%|▏         | 29/2000 [00:08<07:57,  4.13it/s]

Training_loss 49.36907


  2%|▏         | 30/2000 [00:09<07:46,  4.22it/s]

Training_loss 49.22016


  2%|▏         | 31/2000 [00:09<07:32,  4.35it/s]

Training_loss 49.15748


  2%|▏         | 32/2000 [00:09<08:12,  4.00it/s]

Training_loss 49.03033


  2%|▏         | 33/2000 [00:10<11:06,  2.95it/s]

Training_loss 48.91478


  2%|▏         | 34/2000 [00:10<10:31,  3.11it/s]

Training_loss 48.80093


  2%|▏         | 35/2000 [00:10<11:29,  2.85it/s]

Training_loss 48.70859


  2%|▏         | 36/2000 [00:11<12:17,  2.66it/s]

Training_loss 48.64099


  2%|▏         | 37/2000 [00:11<13:25,  2.44it/s]

Training_loss 48.54208


  2%|▏         | 38/2000 [00:12<14:17,  2.29it/s]

Training_loss 48.42115


  2%|▏         | 39/2000 [00:12<14:36,  2.24it/s]

Training_loss 48.34277


  2%|▏         | 40/2000 [00:13<15:46,  2.07it/s]

Training_loss 48.26433


  2%|▏         | 41/2000 [00:13<16:21,  2.00it/s]

Training_loss 48.18289


  2%|▏         | 42/2000 [00:14<16:37,  1.96it/s]

Training_loss 48.07115


  2%|▏         | 43/2000 [00:14<17:11,  1.90it/s]

Training_loss 47.98116


  2%|▏         | 44/2000 [00:15<16:53,  1.93it/s]

Training_loss 47.84173


  2%|▏         | 45/2000 [00:15<17:05,  1.91it/s]

Training_loss 47.74275


  2%|▏         | 46/2000 [00:16<16:27,  1.98it/s]

Training_loss 47.64305


  2%|▏         | 47/2000 [00:16<13:51,  2.35it/s]

Training_loss 47.54747


  2%|▏         | 48/2000 [00:16<12:08,  2.68it/s]

Training_loss 47.42204


  2%|▏         | 49/2000 [00:17<11:23,  2.86it/s]

Training_loss 47.31537


  2%|▎         | 50/2000 [00:17<11:32,  2.82it/s]

Training_loss 47.17173


  3%|▎         | 51/2000 [00:18<12:43,  2.55it/s]

Training_loss 47.08224


  3%|▎         | 52/2000 [00:18<14:03,  2.31it/s]

Training_loss 46.93007


  3%|▎         | 53/2000 [00:19<14:44,  2.20it/s]

Training_loss 46.76476


  3%|▎         | 54/2000 [00:19<15:18,  2.12it/s]

Training_loss 46.62804


  3%|▎         | 55/2000 [00:20<15:49,  2.05it/s]

Training_loss 46.45732


  3%|▎         | 56/2000 [00:20<15:59,  2.03it/s]

Training_loss 46.37834


  3%|▎         | 57/2000 [00:21<16:28,  1.96it/s]

Training_loss 46.24551


  3%|▎         | 58/2000 [00:21<17:05,  1.89it/s]

Training_loss 46.10523


  3%|▎         | 59/2000 [00:22<17:18,  1.87it/s]

Training_loss 45.99553


  3%|▎         | 60/2000 [00:22<17:29,  1.85it/s]

Training_loss 45.85909


  3%|▎         | 61/2000 [00:23<17:59,  1.80it/s]

Training_loss 45.75398


  3%|▎         | 62/2000 [00:23<17:53,  1.81it/s]

Training_loss 45.62123


  3%|▎         | 63/2000 [00:24<19:24,  1.66it/s]

Training_loss 45.45851


  3%|▎         | 64/2000 [00:25<19:59,  1.61it/s]

Training_loss 45.38069


  3%|▎         | 65/2000 [00:25<19:10,  1.68it/s]

Training_loss 45.27795


  3%|▎         | 66/2000 [00:26<18:48,  1.71it/s]

Training_loss 45.17165


  3%|▎         | 67/2000 [00:27<18:41,  1.72it/s]

Training_loss 45.04419


  3%|▎         | 68/2000 [00:27<18:14,  1.77it/s]

Training_loss 44.84323


  3%|▎         | 69/2000 [00:28<18:04,  1.78it/s]

Training_loss 44.73217


  4%|▎         | 70/2000 [00:28<17:43,  1.81it/s]

Training_loss 44.55388


  4%|▎         | 71/2000 [00:29<17:44,  1.81it/s]

Training_loss 44.45833


  4%|▎         | 72/2000 [00:29<16:59,  1.89it/s]

Training_loss 44.33871


  4%|▎         | 73/2000 [00:30<16:31,  1.94it/s]

Training_loss 44.20127


  4%|▎         | 74/2000 [00:30<16:40,  1.92it/s]

Training_loss 44.07018


  4%|▍         | 75/2000 [00:31<16:44,  1.92it/s]

Training_loss 43.94983


  4%|▍         | 76/2000 [00:31<16:28,  1.95it/s]

Training_loss 43.88439


  4%|▍         | 77/2000 [00:32<16:47,  1.91it/s]

Training_loss 43.75758


  4%|▍         | 78/2000 [00:32<14:19,  2.24it/s]

Training_loss 43.66329


  4%|▍         | 79/2000 [00:32<12:26,  2.57it/s]

Training_loss 43.57219


  4%|▍         | 80/2000 [00:33<11:11,  2.86it/s]

Training_loss 43.47983


  4%|▍         | 81/2000 [00:33<10:32,  3.03it/s]

Training_loss 43.34400


  4%|▍         | 82/2000 [00:33<12:13,  2.62it/s]

Training_loss 43.20194


  4%|▍         | 83/2000 [00:34<13:13,  2.41it/s]

Training_loss 43.08154


  4%|▍         | 84/2000 [00:34<14:31,  2.20it/s]

Training_loss 43.00786


  4%|▍         | 85/2000 [00:35<14:59,  2.13it/s]

Training_loss 42.89635


  4%|▍         | 86/2000 [00:35<15:38,  2.04it/s]

Training_loss 42.72831


  4%|▍         | 87/2000 [00:36<15:12,  2.10it/s]

Training_loss 42.61029


  4%|▍         | 88/2000 [00:36<15:56,  2.00it/s]

Training_loss 42.48700


  4%|▍         | 89/2000 [00:37<16:13,  1.96it/s]

Training_loss 42.36600


  4%|▍         | 90/2000 [00:37<16:12,  1.96it/s]

Training_loss 42.25136


  5%|▍         | 91/2000 [00:38<16:09,  1.97it/s]

Training_loss 42.10570


  5%|▍         | 92/2000 [00:38<16:18,  1.95it/s]

Training_loss 41.97237


  5%|▍         | 93/2000 [00:39<17:04,  1.86it/s]

Training_loss 41.86882


  5%|▍         | 94/2000 [00:40<17:16,  1.84it/s]

Training_loss 41.80203


  5%|▍         | 95/2000 [00:40<17:32,  1.81it/s]

Training_loss 41.72145


  5%|▍         | 96/2000 [00:41<17:02,  1.86it/s]

Training_loss 41.67929


  5%|▍         | 97/2000 [00:41<16:43,  1.90it/s]

Training_loss 41.58983


  5%|▍         | 98/2000 [00:42<16:58,  1.87it/s]

Training_loss 41.48632


  5%|▍         | 99/2000 [00:42<17:01,  1.86it/s]

Training_loss 41.35642


  5%|▌         | 100/2000 [00:43<17:02,  1.86it/s]

Training_loss 41.28053


  5%|▌         | 101/2000 [00:43<17:05,  1.85it/s]

Training_loss 41.19597


  5%|▌         | 102/2000 [00:44<16:28,  1.92it/s]

Training_loss 41.07002


  5%|▌         | 103/2000 [00:44<16:24,  1.93it/s]

Training_loss 40.97368


  5%|▌         | 104/2000 [00:45<16:35,  1.90it/s]

Training_loss 40.85313


  5%|▌         | 105/2000 [00:45<16:35,  1.90it/s]

Training_loss 40.79821


  5%|▌         | 106/2000 [00:46<16:33,  1.91it/s]

Training_loss 40.70676


  5%|▌         | 107/2000 [00:46<16:10,  1.95it/s]

Training_loss 40.60597


  5%|▌         | 108/2000 [00:47<16:15,  1.94it/s]

Training_loss 40.47264


  5%|▌         | 109/2000 [00:48<16:22,  1.93it/s]

Training_loss 40.44042


  6%|▌         | 110/2000 [00:48<15:10,  2.08it/s]

Training_loss 40.37340


  6%|▌         | 111/2000 [00:48<12:57,  2.43it/s]

Training_loss 40.29025


  6%|▌         | 112/2000 [00:48<11:33,  2.72it/s]

Training_loss 40.19073


  6%|▌         | 113/2000 [00:49<10:50,  2.90it/s]

Training_loss 40.09167


  6%|▌         | 114/2000 [00:49<11:13,  2.80it/s]

Training_loss 40.00986


  6%|▌         | 115/2000 [00:50<12:25,  2.53it/s]

Training_loss 39.94397


  6%|▌         | 116/2000 [00:50<13:28,  2.33it/s]

Training_loss 39.84831


  6%|▌         | 117/2000 [00:51<14:01,  2.24it/s]

Training_loss 39.78461


  6%|▌         | 118/2000 [00:51<14:45,  2.13it/s]

Training_loss 39.73348


  6%|▌         | 119/2000 [00:52<14:57,  2.10it/s]

Training_loss 39.69379


  6%|▌         | 120/2000 [00:52<15:20,  2.04it/s]

Training_loss 39.58506


  6%|▌         | 121/2000 [00:53<15:32,  2.02it/s]

Training_loss 39.52001


  6%|▌         | 122/2000 [00:53<15:30,  2.02it/s]

Training_loss 39.42876


  6%|▌         | 123/2000 [00:54<15:45,  1.98it/s]

Training_loss 39.34969


  6%|▌         | 124/2000 [00:54<15:51,  1.97it/s]

Training_loss 39.26827


  6%|▋         | 125/2000 [00:55<16:26,  1.90it/s]

Training_loss 39.19168


  6%|▋         | 126/2000 [00:55<15:50,  1.97it/s]

Training_loss 39.12816


  6%|▋         | 127/2000 [00:56<15:59,  1.95it/s]

Training_loss 39.07733


  6%|▋         | 128/2000 [00:56<15:54,  1.96it/s]

Training_loss 38.98229


  6%|▋         | 129/2000 [00:57<15:59,  1.95it/s]

Training_loss 38.91624


  6%|▋         | 130/2000 [00:57<15:55,  1.96it/s]

Training_loss 38.83702


  7%|▋         | 131/2000 [00:58<16:03,  1.94it/s]

Training_loss 38.78851


  7%|▋         | 132/2000 [00:58<15:27,  2.01it/s]

Training_loss 38.71171


  7%|▋         | 133/2000 [00:59<15:35,  2.00it/s]

Training_loss 38.63178


  7%|▋         | 134/2000 [00:59<15:45,  1.97it/s]

Training_loss 38.54459


  7%|▋         | 135/2000 [01:00<16:19,  1.90it/s]

Training_loss 38.47135


  7%|▋         | 136/2000 [01:00<16:33,  1.88it/s]

Training_loss 38.39292


  7%|▋         | 137/2000 [01:01<16:49,  1.85it/s]

Training_loss 38.35517


  7%|▋         | 138/2000 [01:01<16:03,  1.93it/s]

Training_loss 38.29454


  7%|▋         | 139/2000 [01:02<16:01,  1.94it/s]

Training_loss 38.20751


  7%|▋         | 140/2000 [01:02<16:06,  1.93it/s]

Training_loss 38.14543


  7%|▋         | 141/2000 [01:03<15:56,  1.94it/s]

Training_loss 38.03333


  7%|▋         | 142/2000 [01:03<15:58,  1.94it/s]

Training_loss 37.96095


  7%|▋         | 143/2000 [01:04<15:22,  2.01it/s]

Training_loss 37.88749


  7%|▋         | 144/2000 [01:04<12:42,  2.43it/s]

Training_loss 37.83755


  7%|▋         | 145/2000 [01:04<11:28,  2.69it/s]

Training_loss 37.80729


  7%|▋         | 146/2000 [01:05<10:47,  2.86it/s]

Training_loss 37.76014


  7%|▋         | 147/2000 [01:05<10:59,  2.81it/s]

Training_loss 37.72583


  7%|▋         | 148/2000 [01:06<12:51,  2.40it/s]

Training_loss 37.68456


  7%|▋         | 149/2000 [01:06<14:01,  2.20it/s]

Training_loss 37.69225


  8%|▊         | 150/2000 [01:07<14:16,  2.16it/s]

Training_loss 37.64466


  8%|▊         | 151/2000 [01:07<14:35,  2.11it/s]

Training_loss 37.55265


  8%|▊         | 152/2000 [01:08<14:50,  2.08it/s]

Training_loss 37.47947


  8%|▊         | 153/2000 [01:08<15:33,  1.98it/s]

Training_loss 37.44588


  8%|▊         | 154/2000 [01:09<15:51,  1.94it/s]

Training_loss 37.40916


  8%|▊         | 155/2000 [01:09<15:37,  1.97it/s]

Training_loss 37.34148


  8%|▊         | 156/2000 [01:10<15:20,  2.00it/s]

Training_loss 37.28236


  8%|▊         | 157/2000 [01:10<15:26,  1.99it/s]

Training_loss 37.20903


  8%|▊         | 158/2000 [01:11<15:39,  1.96it/s]

Training_loss 37.15378


  8%|▊         | 159/2000 [01:11<15:42,  1.95it/s]

Training_loss 37.11512


  8%|▊         | 160/2000 [01:12<15:37,  1.96it/s]

Training_loss 37.03255


  8%|▊         | 161/2000 [01:12<15:17,  2.00it/s]

Training_loss 36.96404


  8%|▊         | 162/2000 [01:13<16:01,  1.91it/s]

Training_loss 36.91856


  8%|▊         | 163/2000 [01:13<15:52,  1.93it/s]

Training_loss 36.89582


  8%|▊         | 164/2000 [01:14<15:49,  1.93it/s]

Training_loss 36.84985


  8%|▊         | 165/2000 [01:14<15:25,  1.98it/s]

Training_loss 36.82977


  8%|▊         | 166/2000 [01:15<15:37,  1.96it/s]

Training_loss 36.79684


  8%|▊         | 167/2000 [01:15<15:33,  1.96it/s]

Training_loss 36.77821


  8%|▊         | 168/2000 [01:16<15:36,  1.96it/s]

Training_loss 36.75034


  8%|▊         | 169/2000 [01:16<15:14,  2.00it/s]

Training_loss 36.71248


  8%|▊         | 170/2000 [01:17<15:32,  1.96it/s]

Training_loss 36.61221


  9%|▊         | 171/2000 [01:17<15:27,  1.97it/s]

Training_loss 36.57047


  9%|▊         | 172/2000 [01:18<15:25,  1.98it/s]

Training_loss 36.53760


  9%|▊         | 173/2000 [01:18<15:04,  2.02it/s]

Training_loss 36.50270


  9%|▊         | 174/2000 [01:19<15:01,  2.02it/s]

Training_loss 36.45971


  9%|▉         | 175/2000 [01:19<15:25,  1.97it/s]

Training_loss 36.41585


  9%|▉         | 176/2000 [01:20<15:19,  1.98it/s]

Training_loss 36.35776


  9%|▉         | 177/2000 [01:20<12:44,  2.38it/s]

Training_loss 36.31333


  9%|▉         | 178/2000 [01:20<11:07,  2.73it/s]

Training_loss 36.25909


  9%|▉         | 179/2000 [01:21<10:07,  3.00it/s]

Training_loss 36.23981


  9%|▉         | 180/2000 [01:21<10:03,  3.01it/s]

Training_loss 36.22507


  9%|▉         | 181/2000 [01:21<11:46,  2.58it/s]

Training_loss 36.17810


  9%|▉         | 182/2000 [01:22<12:56,  2.34it/s]

Training_loss 36.14396


  9%|▉         | 183/2000 [01:22<13:33,  2.23it/s]

Training_loss 36.10530


  9%|▉         | 184/2000 [01:23<14:20,  2.11it/s]

Training_loss 36.07146


  9%|▉         | 185/2000 [01:23<14:18,  2.12it/s]

Training_loss 36.01585


  9%|▉         | 186/2000 [01:24<14:40,  2.06it/s]

Training_loss 35.98061


  9%|▉         | 187/2000 [01:24<14:37,  2.07it/s]

Training_loss 35.93528


  9%|▉         | 188/2000 [01:25<14:41,  2.06it/s]

Training_loss 35.89524


  9%|▉         | 189/2000 [01:25<14:48,  2.04it/s]

Training_loss 35.87959


 10%|▉         | 190/2000 [01:26<15:07,  2.00it/s]

Training_loss 35.81885


 10%|▉         | 191/2000 [01:27<15:18,  1.97it/s]

Training_loss 35.80438


 10%|▉         | 192/2000 [01:27<15:26,  1.95it/s]

Training_loss 35.76171


 10%|▉         | 193/2000 [01:28<15:28,  1.95it/s]

Training_loss 35.73368


 10%|▉         | 194/2000 [01:28<15:26,  1.95it/s]

Training_loss 35.71042


 10%|▉         | 195/2000 [01:29<15:20,  1.96it/s]

Training_loss 35.67747


 10%|▉         | 196/2000 [01:29<15:26,  1.95it/s]

Training_loss 35.67420


 10%|▉         | 197/2000 [01:30<15:38,  1.92it/s]

Training_loss 35.63116


 10%|▉         | 198/2000 [01:30<15:14,  1.97it/s]

Training_loss 35.61327


 10%|▉         | 199/2000 [01:31<15:18,  1.96it/s]

Training_loss 35.55389


 10%|█         | 200/2000 [01:31<15:37,  1.92it/s]

Training_loss 35.52184


 10%|█         | 201/2000 [01:32<15:42,  1.91it/s]

Training_loss 35.44377


 10%|█         | 202/2000 [01:32<15:11,  1.97it/s]

Training_loss 35.39874


 10%|█         | 203/2000 [01:33<15:11,  1.97it/s]

Training_loss 35.34703


 10%|█         | 204/2000 [01:33<14:41,  2.04it/s]

Training_loss 35.28514


 10%|█         | 205/2000 [01:34<14:46,  2.03it/s]

Training_loss 35.22914


 10%|█         | 206/2000 [01:34<14:50,  2.01it/s]

Training_loss 35.19801


 10%|█         | 207/2000 [01:35<15:27,  1.93it/s]

Training_loss 35.14129


 10%|█         | 208/2000 [01:35<15:39,  1.91it/s]

Training_loss 35.11684


 10%|█         | 209/2000 [01:36<15:31,  1.92it/s]

Training_loss 35.04924


 10%|█         | 210/2000 [01:36<13:31,  2.21it/s]

Training_loss 35.01949


 11%|█         | 211/2000 [01:36<11:39,  2.56it/s]

Training_loss 34.99627


 11%|█         | 212/2000 [01:37<10:18,  2.89it/s]

Training_loss 34.97837


 11%|█         | 213/2000 [01:37<10:30,  2.83it/s]

Training_loss 34.95116


 11%|█         | 214/2000 [01:37<11:46,  2.53it/s]

Training_loss 34.91038


 11%|█         | 215/2000 [01:38<12:14,  2.43it/s]

Training_loss 34.86475


 11%|█         | 216/2000 [01:38<12:59,  2.29it/s]

Training_loss 34.80161


 11%|█         | 217/2000 [01:39<13:52,  2.14it/s]

Training_loss 34.77980


 11%|█         | 218/2000 [01:39<14:20,  2.07it/s]

Training_loss 34.74827


 11%|█         | 219/2000 [01:40<15:02,  1.97it/s]

Training_loss 34.72453


 11%|█         | 220/2000 [01:41<15:44,  1.88it/s]

Training_loss 34.69086


 11%|█         | 221/2000 [01:41<16:05,  1.84it/s]

Training_loss 34.65225


 11%|█         | 222/2000 [01:42<15:38,  1.89it/s]

Training_loss 34.63719


 11%|█         | 223/2000 [01:42<15:07,  1.96it/s]

Training_loss 34.57888


 11%|█         | 224/2000 [01:43<15:27,  1.92it/s]

Training_loss 34.53344


 11%|█▏        | 225/2000 [01:43<15:45,  1.88it/s]

Training_loss 34.53262


 11%|█▏        | 226/2000 [01:44<15:28,  1.91it/s]

Training_loss 34.49061


 11%|█▏        | 227/2000 [01:44<15:18,  1.93it/s]

Training_loss 34.45364


 11%|█▏        | 228/2000 [01:45<15:20,  1.92it/s]

Training_loss 34.40684


 11%|█▏        | 229/2000 [01:45<15:25,  1.91it/s]

Training_loss 34.34921


 12%|█▏        | 230/2000 [01:46<15:04,  1.96it/s]

Training_loss 34.29563


 12%|█▏        | 231/2000 [01:46<14:56,  1.97it/s]

Training_loss 34.25695


 12%|█▏        | 232/2000 [01:47<14:44,  2.00it/s]

Training_loss 34.21208


 12%|█▏        | 233/2000 [01:47<15:22,  1.92it/s]

Training_loss 34.17007


 12%|█▏        | 234/2000 [01:48<15:32,  1.89it/s]

Training_loss 34.12885


 12%|█▏        | 235/2000 [01:48<15:19,  1.92it/s]

Training_loss 34.08308


 12%|█▏        | 236/2000 [01:49<15:03,  1.95it/s]

Training_loss 34.05658


 12%|█▏        | 237/2000 [01:49<15:35,  1.88it/s]

Training_loss 34.00745


 12%|█▏        | 238/2000 [01:50<15:28,  1.90it/s]

Training_loss 33.95745


 12%|█▏        | 239/2000 [01:50<15:02,  1.95it/s]

Training_loss 33.90218


 12%|█▏        | 240/2000 [01:51<14:43,  1.99it/s]

Training_loss 33.82998


 12%|█▏        | 241/2000 [01:51<14:45,  1.99it/s]

Training_loss 33.81370


 12%|█▏        | 242/2000 [01:52<14:53,  1.97it/s]

Training_loss 33.77867


 12%|█▏        | 243/2000 [01:52<12:39,  2.31it/s]

Training_loss 33.72921


 12%|█▏        | 244/2000 [01:52<10:58,  2.67it/s]

Training_loss 33.67774


 12%|█▏        | 245/2000 [01:53<09:52,  2.96it/s]

Training_loss 33.64661


 12%|█▏        | 246/2000 [01:53<09:54,  2.95it/s]

Training_loss 33.63261


 12%|█▏        | 247/2000 [01:53<11:16,  2.59it/s]

Training_loss 33.60985


 12%|█▏        | 248/2000 [01:54<12:20,  2.37it/s]

Training_loss 33.56692


 12%|█▏        | 249/2000 [01:54<12:49,  2.27it/s]

Training_loss 33.54039


 12%|█▎        | 250/2000 [01:55<13:18,  2.19it/s]

Training_loss 33.52977


 13%|█▎        | 251/2000 [01:56<14:09,  2.06it/s]

Training_loss 33.49769


 13%|█▎        | 252/2000 [01:56<14:09,  2.06it/s]

Training_loss 33.45068


 13%|█▎        | 253/2000 [01:57<14:26,  2.02it/s]

Training_loss 33.39763


 13%|█▎        | 254/2000 [01:57<14:38,  1.99it/s]

Training_loss 33.35442


 13%|█▎        | 255/2000 [01:58<15:03,  1.93it/s]

Training_loss 33.33183


 13%|█▎        | 256/2000 [01:58<14:35,  1.99it/s]

Training_loss 33.27038


 13%|█▎        | 257/2000 [01:58<12:34,  2.31it/s]

Training_loss 33.24031


 13%|█▎        | 258/2000 [01:59<11:10,  2.60it/s]

Training_loss 33.21268


 13%|█▎        | 259/2000 [01:59<12:05,  2.40it/s]

Training_loss 33.17605


 13%|█▎        | 260/2000 [02:00<12:40,  2.29it/s]

Training_loss 33.14305


 13%|█▎        | 261/2000 [02:00<12:25,  2.33it/s]

Training_loss 33.12155


 13%|█▎        | 262/2000 [02:00<12:24,  2.34it/s]

Training_loss 33.11001


 13%|█▎        | 263/2000 [02:01<12:13,  2.37it/s]

Training_loss 33.07339


 13%|█▎        | 264/2000 [02:01<11:54,  2.43it/s]

Training_loss 33.03480


 13%|█▎        | 265/2000 [02:02<12:30,  2.31it/s]

Training_loss 33.02110


 13%|█▎        | 266/2000 [02:02<13:58,  2.07it/s]

Training_loss 32.99555


 13%|█▎        | 267/2000 [02:03<14:32,  1.99it/s]

Training_loss 32.98294


 13%|█▎        | 268/2000 [02:03<15:02,  1.92it/s]

Training_loss 32.94679


 13%|█▎        | 269/2000 [02:04<15:53,  1.82it/s]

Training_loss 32.90963


 14%|█▎        | 270/2000 [02:05<16:29,  1.75it/s]

Training_loss 32.89685


 14%|█▎        | 271/2000 [02:05<15:41,  1.84it/s]

Training_loss 32.87475


 14%|█▎        | 272/2000 [02:06<14:25,  2.00it/s]

Training_loss 32.83780


 14%|█▎        | 273/2000 [02:06<14:40,  1.96it/s]

Training_loss 32.80244


 14%|█▎        | 274/2000 [02:07<15:34,  1.85it/s]

Training_loss 32.74972


 14%|█▍        | 275/2000 [02:07<15:38,  1.84it/s]

Training_loss 32.69043


 14%|█▍        | 276/2000 [02:08<16:11,  1.77it/s]

Training_loss 32.63691


 14%|█▍        | 277/2000 [02:08<14:13,  2.02it/s]

Training_loss 32.60780


 14%|█▍        | 278/2000 [02:09<13:00,  2.21it/s]

Training_loss 32.56451


 14%|█▍        | 279/2000 [02:09<11:48,  2.43it/s]

Training_loss 32.53253


 14%|█▍        | 280/2000 [02:09<11:28,  2.50it/s]

Training_loss 32.47386


 14%|█▍        | 281/2000 [02:10<11:54,  2.41it/s]

Training_loss 32.43868


 14%|█▍        | 282/2000 [02:10<13:25,  2.13it/s]

Training_loss 32.40427


 14%|█▍        | 283/2000 [02:11<14:17,  2.00it/s]

Training_loss 32.37978


 14%|█▍        | 284/2000 [02:11<14:53,  1.92it/s]

Training_loss 32.35493


 14%|█▍        | 285/2000 [02:12<15:24,  1.85it/s]

Training_loss 32.33440


 14%|█▍        | 286/2000 [02:12<13:47,  2.07it/s]

Training_loss 32.30587


 14%|█▍        | 287/2000 [02:13<12:46,  2.23it/s]

Training_loss 32.27734


 14%|█▍        | 288/2000 [02:13<12:58,  2.20it/s]

Training_loss 32.26533


 14%|█▍        | 289/2000 [02:14<13:59,  2.04it/s]

Training_loss 32.21598


 14%|█▍        | 290/2000 [02:14<14:33,  1.96it/s]

Training_loss 32.16163


 15%|█▍        | 291/2000 [02:15<14:52,  1.91it/s]

Training_loss 32.12611


 15%|█▍        | 292/2000 [02:15<15:10,  1.87it/s]

Training_loss 32.09119


 15%|█▍        | 293/2000 [02:16<14:40,  1.94it/s]

Training_loss 32.04569


 15%|█▍        | 294/2000 [02:16<14:55,  1.91it/s]

Training_loss 32.01582


 15%|█▍        | 295/2000 [02:17<15:16,  1.86it/s]

Training_loss 31.97406


 15%|█▍        | 296/2000 [02:18<15:23,  1.85it/s]

Training_loss 31.93925


 15%|█▍        | 297/2000 [02:18<15:12,  1.87it/s]

Training_loss 31.91974


 15%|█▍        | 298/2000 [02:19<15:25,  1.84it/s]

Training_loss 31.90095


 15%|█▍        | 299/2000 [02:19<15:20,  1.85it/s]

Training_loss 31.84482


 15%|█▌        | 300/2000 [02:20<14:57,  1.89it/s]

Training_loss 31.84561


 15%|█▌        | 301/2000 [02:20<14:59,  1.89it/s]

Training_loss 31.78154


 15%|█▌        | 302/2000 [02:21<15:08,  1.87it/s]

Training_loss 31.72657


 15%|█▌        | 303/2000 [02:21<15:02,  1.88it/s]

Training_loss 31.68945


 15%|█▌        | 304/2000 [02:22<14:30,  1.95it/s]

Training_loss 31.64563


 15%|█▌        | 305/2000 [02:22<14:15,  1.98it/s]

Training_loss 31.62577


 15%|█▌        | 306/2000 [02:23<14:11,  1.99it/s]

Training_loss 31.58600


 15%|█▌        | 307/2000 [02:23<14:02,  2.01it/s]

Training_loss 31.56741


 15%|█▌        | 308/2000 [02:24<13:58,  2.02it/s]

Training_loss 31.50054


 15%|█▌        | 309/2000 [02:24<12:48,  2.20it/s]

Training_loss 31.47539


 16%|█▌        | 310/2000 [02:24<10:59,  2.56it/s]

Training_loss 31.44118


 16%|█▌        | 311/2000 [02:25<10:21,  2.72it/s]

Training_loss 31.38364


 16%|█▌        | 312/2000 [02:25<09:55,  2.83it/s]

Training_loss 31.35951


 16%|█▌        | 313/2000 [02:25<10:09,  2.77it/s]

Training_loss 31.30334


 16%|█▌        | 314/2000 [02:26<11:48,  2.38it/s]

Training_loss 31.28780


 16%|█▌        | 315/2000 [02:26<12:57,  2.17it/s]

Training_loss 31.25615


 16%|█▌        | 316/2000 [02:27<13:24,  2.09it/s]

Training_loss 31.22269


 16%|█▌        | 317/2000 [02:27<14:00,  2.00it/s]

Training_loss 31.17914


 16%|█▌        | 318/2000 [02:28<14:24,  1.95it/s]

Training_loss 31.14401


 16%|█▌        | 319/2000 [02:29<14:50,  1.89it/s]

Training_loss 31.12194


 16%|█▌        | 320/2000 [02:29<15:00,  1.86it/s]

Training_loss 31.09003


 16%|█▌        | 321/2000 [02:30<14:45,  1.90it/s]

Training_loss 31.03976


 16%|█▌        | 322/2000 [02:30<14:45,  1.89it/s]

Training_loss 31.00298


 16%|█▌        | 323/2000 [02:31<14:50,  1.88it/s]

Training_loss 30.92796


 16%|█▌        | 324/2000 [02:31<14:48,  1.89it/s]

Training_loss 30.87262


 16%|█▋        | 325/2000 [02:32<14:59,  1.86it/s]

Training_loss 30.85568


 16%|█▋        | 326/2000 [02:32<14:44,  1.89it/s]

Training_loss 30.81194


 16%|█▋        | 327/2000 [02:33<15:00,  1.86it/s]

Training_loss 30.76673


 16%|█▋        | 328/2000 [02:33<15:15,  1.83it/s]

Training_loss 30.74699


 16%|█▋        | 329/2000 [02:34<15:01,  1.85it/s]

Training_loss 30.71417


 16%|█▋        | 330/2000 [02:34<14:47,  1.88it/s]

Training_loss 30.65653


 17%|█▋        | 331/2000 [02:35<14:25,  1.93it/s]

Training_loss 30.64155


 17%|█▋        | 332/2000 [02:36<14:34,  1.91it/s]

Training_loss 30.57687


 17%|█▋        | 333/2000 [02:36<14:51,  1.87it/s]

Training_loss 30.52641


 17%|█▋        | 334/2000 [02:37<14:30,  1.91it/s]

Training_loss 30.45577


 17%|█▋        | 335/2000 [02:37<14:43,  1.88it/s]

Training_loss 30.42993


 17%|█▋        | 336/2000 [02:38<14:29,  1.91it/s]

Training_loss 30.41820


 17%|█▋        | 337/2000 [02:38<14:22,  1.93it/s]

Training_loss 30.36650


 17%|█▋        | 338/2000 [02:39<14:34,  1.90it/s]

Training_loss 30.31809


 17%|█▋        | 339/2000 [02:39<14:33,  1.90it/s]

Training_loss 30.27467


 17%|█▋        | 340/2000 [02:40<14:51,  1.86it/s]

Training_loss 30.21025


 17%|█▋        | 341/2000 [02:40<12:58,  2.13it/s]

Training_loss 30.15817


 17%|█▋        | 342/2000 [02:40<11:09,  2.48it/s]

Training_loss 30.12630


 17%|█▋        | 343/2000 [02:41<10:16,  2.69it/s]

Training_loss 30.09903


 17%|█▋        | 344/2000 [02:41<09:51,  2.80it/s]

Training_loss 30.04115


 17%|█▋        | 345/2000 [02:41<11:00,  2.51it/s]

Training_loss 30.00861


 17%|█▋        | 346/2000 [02:42<12:19,  2.24it/s]

Training_loss 29.97479


 17%|█▋        | 347/2000 [02:43<12:57,  2.13it/s]

Training_loss 29.95367


 17%|█▋        | 348/2000 [02:43<13:11,  2.09it/s]

Training_loss 29.90490


 17%|█▋        | 349/2000 [02:44<13:33,  2.03it/s]

Training_loss 29.83731


 18%|█▊        | 350/2000 [02:44<13:53,  1.98it/s]

Training_loss 29.79024


 18%|█▊        | 351/2000 [02:45<14:15,  1.93it/s]

Training_loss 29.74984


 18%|█▊        | 352/2000 [02:45<14:09,  1.94it/s]

Training_loss 29.70515


 18%|█▊        | 353/2000 [02:46<14:36,  1.88it/s]

Training_loss 29.66565


 18%|█▊        | 354/2000 [02:46<14:28,  1.90it/s]

Training_loss 29.60856


 18%|█▊        | 355/2000 [02:47<14:20,  1.91it/s]

Training_loss 29.55255


 18%|█▊        | 356/2000 [02:47<14:30,  1.89it/s]

Training_loss 29.48807


 18%|█▊        | 357/2000 [02:48<14:44,  1.86it/s]

Training_loss 29.44813


 18%|█▊        | 358/2000 [02:48<14:51,  1.84it/s]

Training_loss 29.34004


 18%|█▊        | 359/2000 [02:49<14:33,  1.88it/s]

Training_loss 29.30381


 18%|█▊        | 360/2000 [02:49<14:39,  1.86it/s]

Training_loss 29.25112


 18%|█▊        | 361/2000 [02:50<14:23,  1.90it/s]

Training_loss 29.21472


 18%|█▊        | 362/2000 [02:51<14:59,  1.82it/s]

Training_loss 29.18825


 18%|█▊        | 363/2000 [02:51<15:20,  1.78it/s]

Training_loss 29.15051


 18%|█▊        | 364/2000 [02:52<15:23,  1.77it/s]

Training_loss 29.11193


 18%|█▊        | 365/2000 [02:52<13:34,  2.01it/s]

Training_loss 29.08429


 18%|█▊        | 366/2000 [02:52<12:09,  2.24it/s]

Training_loss 29.03598


 18%|█▊        | 367/2000 [02:53<12:27,  2.19it/s]

Training_loss 28.99585


 18%|█▊        | 368/2000 [02:53<13:09,  2.07it/s]

Training_loss 28.95639


 18%|█▊        | 369/2000 [02:54<13:31,  2.01it/s]

Training_loss 28.91082


 18%|█▊        | 370/2000 [02:54<13:46,  1.97it/s]

Training_loss 28.89345


 19%|█▊        | 371/2000 [02:55<13:40,  1.99it/s]

Training_loss 28.80383


 19%|█▊        | 372/2000 [02:55<13:51,  1.96it/s]

Training_loss 28.78109


 19%|█▊        | 373/2000 [02:56<13:17,  2.04it/s]

Training_loss 28.72686


 19%|█▊        | 374/2000 [02:56<11:27,  2.37it/s]

Training_loss 28.69047


 19%|█▉        | 375/2000 [02:56<10:05,  2.69it/s]

Training_loss 28.65923


 19%|█▉        | 376/2000 [02:57<09:04,  2.98it/s]

Training_loss 28.62755


 19%|█▉        | 377/2000 [02:57<09:20,  2.89it/s]

Training_loss 28.56574


 19%|█▉        | 378/2000 [02:58<10:54,  2.48it/s]

Training_loss 28.50637


 19%|█▉        | 379/2000 [02:58<11:40,  2.31it/s]

Training_loss 28.44364


 19%|█▉        | 380/2000 [02:59<12:10,  2.22it/s]

Training_loss 28.40134


 19%|█▉        | 381/2000 [02:59<12:27,  2.17it/s]

Training_loss 28.34223


 19%|█▉        | 382/2000 [03:00<13:07,  2.05it/s]

Training_loss 28.29686


 19%|█▉        | 383/2000 [03:00<13:11,  2.04it/s]

Training_loss 28.26292


 19%|█▉        | 384/2000 [03:01<13:34,  1.98it/s]

Training_loss 28.21158


 19%|█▉        | 385/2000 [03:01<13:22,  2.01it/s]

Training_loss 28.14929


 19%|█▉        | 386/2000 [03:02<13:50,  1.94it/s]

Training_loss 28.14443


 19%|█▉        | 387/2000 [03:02<14:06,  1.91it/s]

Training_loss 28.11320


 19%|█▉        | 388/2000 [03:03<14:02,  1.91it/s]

Training_loss 28.03945


 19%|█▉        | 389/2000 [03:03<14:04,  1.91it/s]

Training_loss 27.97704


 20%|█▉        | 390/2000 [03:04<13:41,  1.96it/s]

Training_loss 27.92661


 20%|█▉        | 391/2000 [03:04<14:03,  1.91it/s]

Training_loss 27.87531


 20%|█▉        | 392/2000 [03:05<14:25,  1.86it/s]

Training_loss 27.83568


 20%|█▉        | 393/2000 [03:05<14:39,  1.83it/s]

Training_loss 27.79263


 20%|█▉        | 394/2000 [03:06<14:44,  1.82it/s]

Training_loss 27.74214


 20%|█▉        | 395/2000 [03:07<14:37,  1.83it/s]

Training_loss 27.71515


 20%|█▉        | 396/2000 [03:07<14:31,  1.84it/s]

Training_loss 27.68307


 20%|█▉        | 397/2000 [03:08<13:53,  1.92it/s]

Training_loss 27.59846


 20%|█▉        | 398/2000 [03:08<13:46,  1.94it/s]

Training_loss 27.53216


 20%|█▉        | 399/2000 [03:09<14:02,  1.90it/s]

Training_loss 27.47387


 20%|██        | 400/2000 [03:09<14:56,  1.79it/s]

Training_loss 27.40788


 20%|██        | 401/2000 [03:10<15:31,  1.72it/s]

Training_loss 27.37522


 20%|██        | 402/2000 [03:11<16:09,  1.65it/s]

Training_loss 27.32110


 20%|██        | 403/2000 [03:11<16:06,  1.65it/s]

Training_loss 27.26715


 20%|██        | 404/2000 [03:12<15:47,  1.69it/s]

Training_loss 27.23893


 20%|██        | 405/2000 [03:12<13:49,  1.92it/s]

Training_loss 27.19183


 20%|██        | 406/2000 [03:12<11:50,  2.24it/s]

Training_loss 27.13021


 20%|██        | 407/2000 [03:13<10:48,  2.46it/s]

Training_loss 27.06087


 20%|██        | 408/2000 [03:13<09:25,  2.81it/s]

Training_loss 27.01549


 20%|██        | 409/2000 [03:13<10:49,  2.45it/s]

Training_loss 26.97618


 20%|██        | 410/2000 [03:14<11:34,  2.29it/s]

Training_loss 26.93949


 21%|██        | 411/2000 [03:14<12:04,  2.19it/s]

Training_loss 26.89958


 21%|██        | 412/2000 [03:15<12:51,  2.06it/s]

Training_loss 26.85748


 21%|██        | 413/2000 [03:16<13:11,  2.01it/s]

Training_loss 26.76779


 21%|██        | 414/2000 [03:16<13:14,  2.00it/s]

Training_loss 26.72075


 21%|██        | 415/2000 [03:17<13:12,  2.00it/s]

Training_loss 26.67065


 21%|██        | 416/2000 [03:17<13:14,  1.99it/s]

Training_loss 26.60884


 21%|██        | 417/2000 [03:18<13:33,  1.95it/s]

Training_loss 26.54548


 21%|██        | 418/2000 [03:18<13:31,  1.95it/s]

Training_loss 26.47895


 21%|██        | 419/2000 [03:19<13:17,  1.98it/s]

Training_loss 26.44916


 21%|██        | 420/2000 [03:19<13:41,  1.92it/s]

Training_loss 26.41680


 21%|██        | 421/2000 [03:20<13:48,  1.91it/s]

Training_loss 26.35615


 21%|██        | 422/2000 [03:20<14:01,  1.88it/s]

Training_loss 26.28733


 21%|██        | 423/2000 [03:21<13:51,  1.90it/s]

Training_loss 26.23977


 21%|██        | 424/2000 [03:21<13:37,  1.93it/s]

Training_loss 26.13710


 21%|██▏       | 425/2000 [03:22<13:40,  1.92it/s]

Training_loss 26.07854


 21%|██▏       | 426/2000 [03:22<13:48,  1.90it/s]

Training_loss 26.02117


 21%|██▏       | 427/2000 [03:23<13:47,  1.90it/s]

Training_loss 25.97209


 21%|██▏       | 428/2000 [03:23<13:40,  1.92it/s]

Training_loss 25.87448


 21%|██▏       | 429/2000 [03:24<13:38,  1.92it/s]

Training_loss 25.82103


 22%|██▏       | 430/2000 [03:24<13:17,  1.97it/s]

Training_loss 25.78428


 22%|██▏       | 431/2000 [03:25<13:16,  1.97it/s]

Training_loss 25.71634


 22%|██▏       | 432/2000 [03:25<13:21,  1.96it/s]

Training_loss 25.64540


 22%|██▏       | 433/2000 [03:26<12:59,  2.01it/s]

Training_loss 25.59519


 22%|██▏       | 434/2000 [03:26<13:08,  1.99it/s]

Training_loss 25.50867


 22%|██▏       | 435/2000 [03:27<13:52,  1.88it/s]

Training_loss 25.45844


 22%|██▏       | 436/2000 [03:27<13:51,  1.88it/s]

Training_loss 25.44714


 22%|██▏       | 437/2000 [03:28<13:20,  1.95it/s]

Training_loss 25.40808


 22%|██▏       | 438/2000 [03:28<11:23,  2.29it/s]

Training_loss 25.37378


 22%|██▏       | 439/2000 [03:29<10:36,  2.45it/s]

Training_loss 25.28851


 22%|██▏       | 440/2000 [03:29<09:39,  2.69it/s]

Training_loss 25.19486


 22%|██▏       | 441/2000 [03:29<09:24,  2.76it/s]

Training_loss 25.12904


 22%|██▏       | 442/2000 [03:30<10:24,  2.49it/s]

Training_loss 25.05485


 22%|██▏       | 443/2000 [03:30<11:13,  2.31it/s]

Training_loss 24.99173


 22%|██▏       | 444/2000 [03:31<11:51,  2.19it/s]

Training_loss 24.93266


 22%|██▏       | 445/2000 [03:31<12:37,  2.05it/s]

Training_loss 24.90917


 22%|██▏       | 446/2000 [03:32<12:59,  1.99it/s]

Training_loss 24.85306


 22%|██▏       | 447/2000 [03:32<13:07,  1.97it/s]

Training_loss 24.81701


 22%|██▏       | 448/2000 [03:33<12:56,  2.00it/s]

Training_loss 24.75454


 22%|██▏       | 449/2000 [03:33<13:07,  1.97it/s]

Training_loss 24.68881


 22%|██▎       | 450/2000 [03:34<13:27,  1.92it/s]

Training_loss 24.63440


 23%|██▎       | 451/2000 [03:34<13:34,  1.90it/s]

Training_loss 24.60327


 23%|██▎       | 452/2000 [03:35<13:20,  1.93it/s]

Training_loss 24.52368


 23%|██▎       | 453/2000 [03:35<13:27,  1.92it/s]

Training_loss 24.42903


 23%|██▎       | 454/2000 [03:36<13:19,  1.93it/s]

Training_loss 24.37009


 23%|██▎       | 455/2000 [03:36<13:11,  1.95it/s]

Training_loss 24.32515


 23%|██▎       | 456/2000 [03:37<13:17,  1.94it/s]

Training_loss 24.26940


 23%|██▎       | 457/2000 [03:37<13:19,  1.93it/s]

Training_loss 24.21519


 23%|██▎       | 458/2000 [03:38<13:31,  1.90it/s]

Training_loss 24.17347


 23%|██▎       | 459/2000 [03:39<13:44,  1.87it/s]

Training_loss 24.10559


 23%|██▎       | 460/2000 [03:39<14:02,  1.83it/s]

Training_loss 24.03314


 23%|██▎       | 461/2000 [03:40<13:19,  1.92it/s]

Training_loss 23.97167


 23%|██▎       | 462/2000 [03:40<13:28,  1.90it/s]

Training_loss 23.93133


 23%|██▎       | 463/2000 [03:41<13:28,  1.90it/s]

Training_loss 23.87786


 23%|██▎       | 464/2000 [03:41<14:02,  1.82it/s]

Training_loss 23.82057


 23%|██▎       | 465/2000 [03:42<13:07,  1.95it/s]

Training_loss 23.78510


 23%|██▎       | 466/2000 [03:42<13:19,  1.92it/s]

Training_loss 23.68773


 23%|██▎       | 467/2000 [03:43<13:28,  1.90it/s]

Training_loss 23.61722


 23%|██▎       | 468/2000 [03:43<13:11,  1.94it/s]

Training_loss 23.57457


 23%|██▎       | 469/2000 [03:44<13:44,  1.86it/s]

Training_loss 23.52186


 24%|██▎       | 470/2000 [03:44<12:11,  2.09it/s]

Training_loss 23.43032


 24%|██▎       | 471/2000 [03:45<11:13,  2.27it/s]

Training_loss 23.35318


 24%|██▎       | 472/2000 [03:45<10:00,  2.54it/s]

Training_loss 23.31344


 24%|██▎       | 473/2000 [03:45<09:26,  2.69it/s]

Training_loss 23.25513


 24%|██▎       | 474/2000 [03:46<09:40,  2.63it/s]

Training_loss 23.21639


 24%|██▍       | 475/2000 [03:46<10:55,  2.33it/s]

Training_loss 23.15482


 24%|██▍       | 476/2000 [03:47<11:58,  2.12it/s]

Training_loss 23.08471


 24%|██▍       | 477/2000 [03:47<12:32,  2.02it/s]

Training_loss 23.02980


 24%|██▍       | 478/2000 [03:48<12:40,  2.00it/s]

Training_loss 22.97412


 24%|██▍       | 479/2000 [03:48<12:52,  1.97it/s]

Training_loss 22.93884


 24%|██▍       | 480/2000 [03:49<12:47,  1.98it/s]

Training_loss 22.86409


 24%|██▍       | 481/2000 [03:49<12:48,  1.98it/s]

Training_loss 22.79610


 24%|██▍       | 482/2000 [03:50<12:58,  1.95it/s]

Training_loss 22.73994


 24%|██▍       | 483/2000 [03:50<13:11,  1.92it/s]

Training_loss 22.67852


 24%|██▍       | 484/2000 [03:51<13:02,  1.94it/s]

Training_loss 22.59038


 24%|██▍       | 485/2000 [03:51<13:08,  1.92it/s]

Training_loss 22.49443


 24%|██▍       | 486/2000 [03:52<12:49,  1.97it/s]

Training_loss 22.41342


 24%|██▍       | 487/2000 [03:52<12:48,  1.97it/s]

Training_loss 22.35133


 24%|██▍       | 488/2000 [03:53<12:44,  1.98it/s]

Training_loss 22.27997


 24%|██▍       | 489/2000 [03:53<12:56,  1.95it/s]

Training_loss 22.17096


 24%|██▍       | 490/2000 [03:54<13:00,  1.93it/s]

Training_loss 22.09403


 25%|██▍       | 491/2000 [03:54<13:02,  1.93it/s]

Training_loss 22.00716


 25%|██▍       | 492/2000 [03:55<12:40,  1.98it/s]

Training_loss 21.96862


 25%|██▍       | 493/2000 [03:55<12:29,  2.01it/s]

Training_loss 21.87932


 25%|██▍       | 494/2000 [03:56<12:55,  1.94it/s]

Training_loss 21.80058


 25%|██▍       | 495/2000 [03:56<13:02,  1.92it/s]

Training_loss 21.75303


 25%|██▍       | 496/2000 [03:57<13:27,  1.86it/s]

Training_loss 21.67913


 25%|██▍       | 497/2000 [03:58<13:34,  1.85it/s]

Training_loss 21.61484


 25%|██▍       | 498/2000 [03:58<13:05,  1.91it/s]

Training_loss 21.54253


 25%|██▍       | 499/2000 [03:59<13:03,  1.92it/s]

Training_loss 21.48507


 25%|██▌       | 500/2000 [03:59<13:14,  1.89it/s]

Training_loss 21.44693


 25%|██▌       | 501/2000 [04:00<13:15,  1.88it/s]

Training_loss 21.37004


 25%|██▌       | 502/2000 [04:00<13:29,  1.85it/s]

Training_loss 21.29358


 25%|██▌       | 503/2000 [04:01<12:58,  1.92it/s]

Training_loss 21.21059


 25%|██▌       | 504/2000 [04:01<11:49,  2.11it/s]

Training_loss 21.17257


 25%|██▌       | 505/2000 [04:01<11:01,  2.26it/s]

Training_loss 21.10153


 25%|██▌       | 506/2000 [04:02<10:01,  2.49it/s]

Training_loss 21.04378


 25%|██▌       | 507/2000 [04:02<09:46,  2.54it/s]

Training_loss 20.98423


 25%|██▌       | 508/2000 [04:03<11:10,  2.23it/s]

Training_loss 20.93542


 25%|██▌       | 509/2000 [04:03<12:24,  2.00it/s]

Training_loss 20.87758


 26%|██▌       | 510/2000 [04:04<12:57,  1.92it/s]

Training_loss 20.80848


 26%|██▌       | 511/2000 [04:04<12:58,  1.91it/s]

Training_loss 20.74015


 26%|██▌       | 512/2000 [04:05<13:29,  1.84it/s]

Training_loss 20.66323


 26%|██▌       | 513/2000 [04:06<13:28,  1.84it/s]

Training_loss 20.59904


 26%|██▌       | 514/2000 [04:06<13:26,  1.84it/s]

Training_loss 20.52532


 26%|██▌       | 515/2000 [04:07<13:24,  1.85it/s]

Training_loss 20.45986


 26%|██▌       | 516/2000 [04:07<13:13,  1.87it/s]

Training_loss 20.38581


 26%|██▌       | 517/2000 [04:08<12:21,  2.00it/s]

Training_loss 20.33435


 26%|██▌       | 518/2000 [04:08<12:28,  1.98it/s]

Training_loss 20.29102


 26%|██▌       | 519/2000 [04:08<11:15,  2.19it/s]

Training_loss 20.24807


 26%|██▌       | 520/2000 [04:09<11:40,  2.11it/s]

Training_loss 20.21944


 26%|██▌       | 521/2000 [04:09<12:04,  2.04it/s]

Training_loss 20.18166


 26%|██▌       | 522/2000 [04:10<12:05,  2.04it/s]

Training_loss 20.12391


 26%|██▌       | 523/2000 [04:10<12:12,  2.02it/s]

Training_loss 20.05324


 26%|██▌       | 524/2000 [04:11<12:23,  1.98it/s]

Training_loss 19.99516


 26%|██▋       | 525/2000 [04:12<12:35,  1.95it/s]

Training_loss 19.90312


 26%|██▋       | 526/2000 [04:12<12:39,  1.94it/s]

Training_loss 19.87024


 26%|██▋       | 527/2000 [04:13<12:41,  1.93it/s]

Training_loss 19.83459


 26%|██▋       | 528/2000 [04:13<12:50,  1.91it/s]

Training_loss 19.77501


 26%|██▋       | 529/2000 [04:14<13:20,  1.84it/s]

Training_loss 19.74162


 26%|██▋       | 530/2000 [04:14<13:27,  1.82it/s]

Training_loss 19.67891


 27%|██▋       | 531/2000 [04:15<13:18,  1.84it/s]

Training_loss 19.61974


 27%|██▋       | 532/2000 [04:15<13:10,  1.86it/s]

Training_loss 19.57971


 27%|██▋       | 533/2000 [04:16<13:03,  1.87it/s]

Training_loss 19.51507


 27%|██▋       | 534/2000 [04:16<11:32,  2.12it/s]

Training_loss 19.43894


 27%|██▋       | 535/2000 [04:16<09:49,  2.49it/s]

Training_loss 19.40724


 27%|██▋       | 536/2000 [04:17<08:46,  2.78it/s]

Training_loss 19.32785


 27%|██▋       | 537/2000 [04:17<08:21,  2.92it/s]

Training_loss 19.24597


 27%|██▋       | 538/2000 [04:17<08:52,  2.75it/s]

Training_loss 19.18581


 27%|██▋       | 539/2000 [04:18<10:14,  2.38it/s]

Training_loss 19.13333


 27%|██▋       | 540/2000 [04:18<10:50,  2.24it/s]

Training_loss 19.08088


 27%|██▋       | 541/2000 [04:19<11:20,  2.15it/s]

Training_loss 19.04711


 27%|██▋       | 542/2000 [04:20<11:58,  2.03it/s]

Training_loss 18.99205


 27%|██▋       | 543/2000 [04:20<12:25,  1.95it/s]

Training_loss 18.95639


 27%|██▋       | 544/2000 [04:21<13:00,  1.87it/s]

Training_loss 18.90932


 27%|██▋       | 545/2000 [04:21<13:26,  1.80it/s]

Training_loss 18.84679


 27%|██▋       | 546/2000 [04:22<13:06,  1.85it/s]

Training_loss 18.79499


 27%|██▋       | 547/2000 [04:22<13:12,  1.83it/s]

Training_loss 18.75008


 27%|██▋       | 548/2000 [04:23<13:14,  1.83it/s]

Training_loss 18.70849


 27%|██▋       | 549/2000 [04:23<13:20,  1.81it/s]

Training_loss 18.66480


 28%|██▊       | 550/2000 [04:24<13:34,  1.78it/s]

Training_loss 18.61038


 28%|██▊       | 551/2000 [04:25<13:40,  1.77it/s]

Training_loss 18.54456


 28%|██▊       | 552/2000 [04:25<14:49,  1.63it/s]

Training_loss 18.48683


 28%|██▊       | 553/2000 [04:26<14:38,  1.65it/s]

Training_loss 18.43752


 28%|██▊       | 554/2000 [04:26<14:10,  1.70it/s]

Training_loss 18.38109


 28%|██▊       | 555/2000 [04:27<13:18,  1.81it/s]

Training_loss 18.33394


 28%|██▊       | 556/2000 [04:27<13:04,  1.84it/s]

Training_loss 18.28000


 28%|██▊       | 557/2000 [04:28<12:32,  1.92it/s]

Training_loss 18.23866


 28%|██▊       | 558/2000 [04:28<11:44,  2.05it/s]

Training_loss 18.17826


 28%|██▊       | 559/2000 [04:29<11:49,  2.03it/s]

Training_loss 18.12216


 28%|██▊       | 560/2000 [04:29<12:00,  2.00it/s]

Training_loss 18.07761


 28%|██▊       | 561/2000 [04:30<12:19,  1.95it/s]

Training_loss 18.00781


 28%|██▊       | 562/2000 [04:30<12:45,  1.88it/s]

Training_loss 17.98080


 28%|██▊       | 563/2000 [04:31<12:51,  1.86it/s]

Training_loss 17.93552


 28%|██▊       | 564/2000 [04:32<12:42,  1.88it/s]

Training_loss 17.87970


 28%|██▊       | 565/2000 [04:32<12:41,  1.88it/s]

Training_loss 17.84240


 28%|██▊       | 566/2000 [04:33<12:23,  1.93it/s]

Training_loss 17.80212


 28%|██▊       | 567/2000 [04:33<10:42,  2.23it/s]

Training_loss 17.76260


 28%|██▊       | 568/2000 [04:33<10:41,  2.23it/s]

Training_loss 17.74971


 28%|██▊       | 569/2000 [04:34<11:05,  2.15it/s]

Training_loss 17.70202


 28%|██▊       | 570/2000 [04:34<11:17,  2.11it/s]

Training_loss 17.65629


 29%|██▊       | 571/2000 [04:35<10:20,  2.30it/s]

Training_loss 17.61465


 29%|██▊       | 572/2000 [04:35<10:17,  2.31it/s]

Training_loss 17.58791


 29%|██▊       | 573/2000 [04:36<11:11,  2.12it/s]

Training_loss 17.53889


 29%|██▊       | 574/2000 [04:36<10:35,  2.24it/s]

Training_loss 17.48714


 29%|██▉       | 575/2000 [04:37<11:18,  2.10it/s]

Training_loss 17.46936


 29%|██▉       | 576/2000 [04:37<11:39,  2.04it/s]

Training_loss 17.43966


 29%|██▉       | 577/2000 [04:38<11:58,  1.98it/s]

Training_loss 17.40129


 29%|██▉       | 578/2000 [04:38<12:05,  1.96it/s]

Training_loss 17.35345


 29%|██▉       | 579/2000 [04:39<12:00,  1.97it/s]

Training_loss 17.30238


 29%|██▉       | 580/2000 [04:39<12:31,  1.89it/s]

Training_loss 17.27259


 29%|██▉       | 581/2000 [04:40<12:03,  1.96it/s]

Training_loss 17.24101


 29%|██▉       | 582/2000 [04:40<12:00,  1.97it/s]

Training_loss 17.19949


 29%|██▉       | 583/2000 [04:41<12:23,  1.91it/s]

Training_loss 17.14747


 29%|██▉       | 584/2000 [04:41<12:29,  1.89it/s]

Training_loss 17.10429


 29%|██▉       | 585/2000 [04:42<12:15,  1.92it/s]

Training_loss 17.07054


 29%|██▉       | 586/2000 [04:42<12:21,  1.91it/s]

Training_loss 17.01040


 29%|██▉       | 587/2000 [04:43<12:32,  1.88it/s]

Training_loss 16.93460


 29%|██▉       | 588/2000 [04:43<12:33,  1.87it/s]

Training_loss 16.88138


 29%|██▉       | 589/2000 [04:44<12:50,  1.83it/s]

Training_loss 16.86060


 30%|██▉       | 590/2000 [04:45<12:40,  1.85it/s]

Training_loss 16.83618


 30%|██▉       | 591/2000 [04:45<12:28,  1.88it/s]

Training_loss 16.78254


 30%|██▉       | 592/2000 [04:46<12:22,  1.90it/s]

Training_loss 16.74060


 30%|██▉       | 593/2000 [04:46<12:27,  1.88it/s]

Training_loss 16.69660


 30%|██▉       | 594/2000 [04:47<12:14,  1.91it/s]

Training_loss 16.66661


 30%|██▉       | 595/2000 [04:47<12:27,  1.88it/s]

Training_loss 16.61019


 30%|██▉       | 596/2000 [04:48<12:38,  1.85it/s]

Training_loss 16.56824


 30%|██▉       | 597/2000 [04:48<12:35,  1.86it/s]

Training_loss 16.52872


 30%|██▉       | 598/2000 [04:48<10:28,  2.23it/s]

Training_loss 16.48849


 30%|██▉       | 599/2000 [04:49<09:52,  2.36it/s]

Training_loss 16.44191


 30%|███       | 600/2000 [04:49<09:11,  2.54it/s]

Training_loss 16.40436


 30%|███       | 601/2000 [04:50<08:51,  2.63it/s]

Training_loss 16.38210


 30%|███       | 602/2000 [04:50<09:25,  2.47it/s]

Training_loss 16.33716


 30%|███       | 603/2000 [04:51<10:13,  2.28it/s]

Training_loss 16.29444


 30%|███       | 604/2000 [04:51<10:43,  2.17it/s]

Training_loss 16.24783


 30%|███       | 605/2000 [04:52<11:24,  2.04it/s]

Training_loss 16.22260


 30%|███       | 606/2000 [04:52<11:28,  2.02it/s]

Training_loss 16.18910


 30%|███       | 607/2000 [04:53<11:39,  1.99it/s]

Training_loss 16.15186


 30%|███       | 608/2000 [04:53<11:20,  2.05it/s]

Training_loss 16.12261


 30%|███       | 609/2000 [04:54<11:18,  2.05it/s]

Training_loss 16.09285


 30%|███       | 610/2000 [04:54<11:41,  1.98it/s]

Training_loss 16.06173


 31%|███       | 611/2000 [04:55<11:43,  1.98it/s]

Training_loss 16.01530


 31%|███       | 612/2000 [04:55<11:52,  1.95it/s]

Training_loss 15.97728


 31%|███       | 613/2000 [04:56<11:56,  1.94it/s]

Training_loss 15.93900


 31%|███       | 614/2000 [04:56<12:19,  1.87it/s]

Training_loss 15.89226


 31%|███       | 615/2000 [04:57<11:54,  1.94it/s]

Training_loss 15.85983


 31%|███       | 616/2000 [04:57<11:53,  1.94it/s]

Training_loss 15.81831


 31%|███       | 617/2000 [04:58<11:40,  1.97it/s]

Training_loss 15.78932


 31%|███       | 618/2000 [04:58<11:58,  1.92it/s]

Training_loss 15.74795


 31%|███       | 619/2000 [04:59<12:06,  1.90it/s]

Training_loss 15.69218


 31%|███       | 620/2000 [04:59<12:03,  1.91it/s]

Training_loss 15.66721


 31%|███       | 621/2000 [05:00<12:14,  1.88it/s]

Training_loss 15.63410


 31%|███       | 622/2000 [05:00<12:02,  1.91it/s]

Training_loss 15.61052


 31%|███       | 623/2000 [05:01<12:12,  1.88it/s]

Training_loss 15.56916


 31%|███       | 624/2000 [05:01<12:06,  1.89it/s]

Training_loss 15.53422


 31%|███▏      | 625/2000 [05:02<10:16,  2.23it/s]

Training_loss 15.49600


 31%|███▏      | 626/2000 [05:02<10:16,  2.23it/s]

Training_loss 15.45459


 31%|███▏      | 627/2000 [05:03<10:54,  2.10it/s]

Training_loss 15.42085


 31%|███▏      | 628/2000 [05:03<11:24,  2.00it/s]

Training_loss 15.38132


 31%|███▏      | 629/2000 [05:04<11:31,  1.98it/s]

Training_loss 15.36201


 32%|███▏      | 630/2000 [05:04<10:48,  2.11it/s]

Training_loss 15.33339


 32%|███▏      | 631/2000 [05:04<09:20,  2.44it/s]

Training_loss 15.29445


 32%|███▏      | 632/2000 [05:05<09:00,  2.53it/s]

Training_loss 15.26623


 32%|███▏      | 633/2000 [05:05<08:38,  2.64it/s]

Training_loss 15.24031


 32%|███▏      | 634/2000 [05:06<09:44,  2.34it/s]

Training_loss 15.20988


 32%|███▏      | 635/2000 [05:06<09:50,  2.31it/s]

Training_loss 15.17883


 32%|███▏      | 636/2000 [05:07<10:15,  2.22it/s]

Training_loss 15.11909


 32%|███▏      | 637/2000 [05:07<10:45,  2.11it/s]

Training_loss 15.09439


 32%|███▏      | 638/2000 [05:08<11:11,  2.03it/s]

Training_loss 15.06908


 32%|███▏      | 639/2000 [05:08<11:07,  2.04it/s]

Training_loss 15.03915


 32%|███▏      | 640/2000 [05:09<11:25,  1.98it/s]

Training_loss 15.00165


 32%|███▏      | 641/2000 [05:09<11:57,  1.90it/s]

Training_loss 14.96107


 32%|███▏      | 642/2000 [05:10<11:29,  1.97it/s]

Training_loss 14.92821


 32%|███▏      | 643/2000 [05:10<11:48,  1.91it/s]

Training_loss 14.91004


 32%|███▏      | 644/2000 [05:11<11:51,  1.91it/s]

Training_loss 14.87047


 32%|███▏      | 645/2000 [05:11<11:32,  1.96it/s]

Training_loss 14.82873


 32%|███▏      | 646/2000 [05:12<11:30,  1.96it/s]

Training_loss 14.79125


 32%|███▏      | 647/2000 [05:12<11:31,  1.96it/s]

Training_loss 14.76159


 32%|███▏      | 648/2000 [05:13<11:44,  1.92it/s]

Training_loss 14.73639


 32%|███▏      | 649/2000 [05:13<11:47,  1.91it/s]

Training_loss 14.70569


 32%|███▎      | 650/2000 [05:14<11:41,  1.93it/s]

Training_loss 14.67324


 33%|███▎      | 651/2000 [05:14<11:42,  1.92it/s]

Training_loss 14.62691


 33%|███▎      | 652/2000 [05:15<11:50,  1.90it/s]

Training_loss 14.59187


 33%|███▎      | 653/2000 [05:15<11:41,  1.92it/s]

Training_loss 14.56061


 33%|███▎      | 654/2000 [05:16<11:48,  1.90it/s]

Training_loss 14.54254


 33%|███▎      | 655/2000 [05:17<11:34,  1.94it/s]

Training_loss 14.50384


 33%|███▎      | 656/2000 [05:17<11:49,  1.89it/s]

Training_loss 14.47993


 33%|███▎      | 657/2000 [05:18<11:50,  1.89it/s]

Training_loss 14.43865


 33%|███▎      | 658/2000 [05:18<11:25,  1.96it/s]

Training_loss 14.40373


 33%|███▎      | 659/2000 [05:19<11:20,  1.97it/s]

Training_loss 14.38285


 33%|███▎      | 660/2000 [05:19<10:10,  2.19it/s]

Training_loss 14.34686


 33%|███▎      | 661/2000 [05:19<10:33,  2.11it/s]

Training_loss 14.30882


 33%|███▎      | 662/2000 [05:20<10:43,  2.08it/s]

Training_loss 14.28220


 33%|███▎      | 663/2000 [05:20<09:39,  2.31it/s]

Training_loss 14.24110


 33%|███▎      | 664/2000 [05:21<09:14,  2.41it/s]

Training_loss 14.21883


 33%|███▎      | 665/2000 [05:21<08:28,  2.63it/s]

Training_loss 14.17921


 33%|███▎      | 666/2000 [05:21<08:49,  2.52it/s]

Training_loss 14.15428


 33%|███▎      | 667/2000 [05:22<09:27,  2.35it/s]

Training_loss 14.12006


 33%|███▎      | 668/2000 [05:22<09:54,  2.24it/s]

Training_loss 14.08351


 33%|███▎      | 669/2000 [05:23<10:40,  2.08it/s]

Training_loss 14.05609


 34%|███▎      | 670/2000 [05:23<11:07,  1.99it/s]

Training_loss 14.02416


 34%|███▎      | 671/2000 [05:24<11:03,  2.00it/s]

Training_loss 14.00046


 34%|███▎      | 672/2000 [05:24<10:57,  2.02it/s]

Training_loss 13.97462


 34%|███▎      | 673/2000 [05:25<11:10,  1.98it/s]

Training_loss 13.94592


 34%|███▎      | 674/2000 [05:26<11:31,  1.92it/s]

Training_loss 13.90540


 34%|███▍      | 675/2000 [05:26<11:24,  1.94it/s]

Training_loss 13.87781


 34%|███▍      | 676/2000 [05:27<11:28,  1.92it/s]

Training_loss 13.83796


 34%|███▍      | 677/2000 [05:27<11:47,  1.87it/s]

Training_loss 13.80085


 34%|███▍      | 678/2000 [05:28<11:57,  1.84it/s]

Training_loss 13.76572


 34%|███▍      | 679/2000 [05:28<10:42,  2.06it/s]

Training_loss 13.74143


 34%|███▍      | 680/2000 [05:28<09:29,  2.32it/s]

Training_loss 13.71825


 34%|███▍      | 681/2000 [05:29<08:49,  2.49it/s]

Training_loss 13.69851


 34%|███▍      | 682/2000 [05:29<08:20,  2.63it/s]

Training_loss 13.67311


 34%|███▍      | 683/2000 [05:29<07:49,  2.80it/s]

Training_loss 13.64947


 34%|███▍      | 684/2000 [05:30<08:24,  2.61it/s]

Training_loss 13.62699


 34%|███▍      | 685/2000 [05:30<09:04,  2.41it/s]

Training_loss 13.58120


 34%|███▍      | 686/2000 [05:31<09:54,  2.21it/s]

Training_loss 13.54541


 34%|███▍      | 687/2000 [05:31<10:23,  2.11it/s]

Training_loss 13.52244


 34%|███▍      | 688/2000 [05:32<11:02,  1.98it/s]

Training_loss 13.50691


 34%|███▍      | 689/2000 [05:32<11:12,  1.95it/s]

Training_loss 13.47824


 34%|███▍      | 690/2000 [05:33<11:42,  1.86it/s]

Training_loss 13.45636


 35%|███▍      | 691/2000 [05:34<11:39,  1.87it/s]

Training_loss 13.43865


 35%|███▍      | 692/2000 [05:34<11:36,  1.88it/s]

Training_loss 13.40628


 35%|███▍      | 693/2000 [05:35<11:27,  1.90it/s]

Training_loss 13.36719


 35%|███▍      | 694/2000 [05:35<11:39,  1.87it/s]

Training_loss 13.33956


 35%|███▍      | 695/2000 [05:36<11:50,  1.84it/s]

Training_loss 13.31283


 35%|███▍      | 696/2000 [05:36<11:19,  1.92it/s]

Training_loss 13.29349


 35%|███▍      | 697/2000 [05:36<09:57,  2.18it/s]

Training_loss 13.27168


 35%|███▍      | 698/2000 [05:37<09:31,  2.28it/s]

Training_loss 13.23311


 35%|███▍      | 699/2000 [05:37<08:23,  2.58it/s]

Training_loss 13.21254


 35%|███▌      | 700/2000 [05:38<08:22,  2.59it/s]

Training_loss 13.19919


 35%|███▌      | 701/2000 [05:38<09:14,  2.34it/s]

Training_loss 13.17485


 35%|███▌      | 702/2000 [05:39<09:39,  2.24it/s]

Training_loss 13.14659


 35%|███▌      | 703/2000 [05:39<10:20,  2.09it/s]

Training_loss 13.12010


 35%|███▌      | 704/2000 [05:39<09:54,  2.18it/s]

Training_loss 13.10169


 35%|███▌      | 705/2000 [05:40<10:15,  2.10it/s]

Training_loss 13.08282


 35%|███▌      | 706/2000 [05:40<09:45,  2.21it/s]

Training_loss 13.05414


 35%|███▌      | 707/2000 [05:41<10:08,  2.13it/s]

Training_loss 13.02157


 35%|███▌      | 708/2000 [05:41<10:26,  2.06it/s]

Training_loss 12.99494


 35%|███▌      | 709/2000 [05:42<10:49,  1.99it/s]

Training_loss 12.96379


 36%|███▌      | 710/2000 [05:42<10:45,  2.00it/s]

Training_loss 12.93559


 36%|███▌      | 711/2000 [05:43<11:31,  1.86it/s]

Training_loss 12.89878


 36%|███▌      | 712/2000 [05:44<11:18,  1.90it/s]

Training_loss 12.88292


 36%|███▌      | 713/2000 [05:44<11:01,  1.95it/s]

Training_loss 12.84368


 36%|███▌      | 714/2000 [05:45<10:44,  2.00it/s]

Training_loss 12.81598


 36%|███▌      | 715/2000 [05:45<11:05,  1.93it/s]

Training_loss 12.77737


 36%|███▌      | 716/2000 [05:46<11:33,  1.85it/s]

Training_loss 12.74299


 36%|███▌      | 717/2000 [05:46<11:44,  1.82it/s]

Training_loss 12.72309


 36%|███▌      | 718/2000 [05:47<11:49,  1.81it/s]

Training_loss 12.69818


 36%|███▌      | 719/2000 [05:47<12:01,  1.78it/s]

Training_loss 12.66405


 36%|███▌      | 720/2000 [05:48<12:08,  1.76it/s]

Training_loss 12.64871


 36%|███▌      | 721/2000 [05:48<11:28,  1.86it/s]

Training_loss 12.63753


 36%|███▌      | 722/2000 [05:49<11:14,  1.90it/s]

Training_loss 12.60516


 36%|███▌      | 723/2000 [05:49<11:12,  1.90it/s]

Training_loss 12.58288


 36%|███▌      | 724/2000 [05:50<11:04,  1.92it/s]

Training_loss 12.55440


 36%|███▋      | 725/2000 [05:51<11:05,  1.91it/s]

Training_loss 12.53476


 36%|███▋      | 726/2000 [05:51<11:01,  1.92it/s]

Training_loss 12.50954


 36%|███▋      | 727/2000 [05:52<11:04,  1.92it/s]

Training_loss 12.49248


 36%|███▋      | 728/2000 [05:52<10:52,  1.95it/s]

Training_loss 12.47321


 36%|███▋      | 729/2000 [05:52<09:20,  2.27it/s]

Training_loss 12.44601


 36%|███▋      | 730/2000 [05:53<08:36,  2.46it/s]

Training_loss 12.43622


 37%|███▋      | 731/2000 [05:53<09:16,  2.28it/s]

Training_loss 12.41858


 37%|███▋      | 732/2000 [05:54<08:59,  2.35it/s]

Training_loss 12.39093


 37%|███▋      | 733/2000 [05:54<08:33,  2.47it/s]

Training_loss 12.38132


 37%|███▋      | 734/2000 [05:54<09:13,  2.29it/s]

Training_loss 12.35788


 37%|███▋      | 735/2000 [05:55<09:50,  2.14it/s]

Training_loss 12.33863


 37%|███▋      | 736/2000 [05:56<10:28,  2.01it/s]

Training_loss 12.31739


 37%|███▋      | 737/2000 [05:56<10:50,  1.94it/s]

Training_loss 12.30426


 37%|███▋      | 738/2000 [05:57<10:48,  1.95it/s]

Training_loss 12.27716


 37%|███▋      | 739/2000 [05:57<10:42,  1.96it/s]

Training_loss 12.26809


 37%|███▋      | 740/2000 [05:58<10:33,  1.99it/s]

Training_loss 12.25126


 37%|███▋      | 741/2000 [05:58<10:33,  1.99it/s]

Training_loss 12.22896


 37%|███▋      | 742/2000 [05:59<10:42,  1.96it/s]

Training_loss 12.20872


 37%|███▋      | 743/2000 [05:59<10:35,  1.98it/s]

Training_loss 12.17994


 37%|███▋      | 744/2000 [06:00<10:37,  1.97it/s]

Training_loss 12.16546


 37%|███▋      | 745/2000 [06:00<10:22,  2.02it/s]

Training_loss 12.15081


 37%|███▋      | 746/2000 [06:01<10:31,  1.99it/s]

Training_loss 12.13711


 37%|███▋      | 747/2000 [06:01<10:38,  1.96it/s]

Training_loss 12.11897


 37%|███▋      | 748/2000 [06:02<10:51,  1.92it/s]

Training_loss 12.08990


 37%|███▋      | 749/2000 [06:02<10:54,  1.91it/s]

Training_loss 12.07589


 38%|███▊      | 750/2000 [06:03<10:52,  1.92it/s]

Training_loss 12.06583


 38%|███▊      | 751/2000 [06:03<10:59,  1.90it/s]

Training_loss 12.05149


 38%|███▊      | 752/2000 [06:04<10:53,  1.91it/s]

Training_loss 12.02650


 38%|███▊      | 753/2000 [06:04<10:49,  1.92it/s]

Training_loss 12.00068


 38%|███▊      | 754/2000 [06:05<10:40,  1.95it/s]

Training_loss 11.97854


 38%|███▊      | 755/2000 [06:05<10:43,  1.94it/s]

Training_loss 11.96159


 38%|███▊      | 756/2000 [06:06<10:43,  1.93it/s]

Training_loss 11.94190


 38%|███▊      | 757/2000 [06:06<10:43,  1.93it/s]

Training_loss 11.93102


 38%|███▊      | 758/2000 [06:07<10:31,  1.97it/s]

Training_loss 11.90515


 38%|███▊      | 759/2000 [06:07<10:35,  1.95it/s]

Training_loss 11.89701


 38%|███▊      | 760/2000 [06:08<10:45,  1.92it/s]

Training_loss 11.87661


 38%|███▊      | 761/2000 [06:08<10:06,  2.04it/s]

Training_loss 11.86701


 38%|███▊      | 762/2000 [06:09<09:27,  2.18it/s]

Training_loss 11.84928


 38%|███▊      | 763/2000 [06:09<09:49,  2.10it/s]

Training_loss 11.83741


 38%|███▊      | 764/2000 [06:10<09:08,  2.25it/s]

Training_loss 11.81839


 38%|███▊      | 765/2000 [06:10<08:50,  2.33it/s]

Training_loss 11.80608


 38%|███▊      | 766/2000 [06:10<08:44,  2.35it/s]

Training_loss 11.78970


 38%|███▊      | 767/2000 [06:11<09:24,  2.18it/s]

Training_loss 11.77387


 38%|███▊      | 768/2000 [06:11<09:37,  2.13it/s]

Training_loss 11.76186


 38%|███▊      | 769/2000 [06:12<09:59,  2.05it/s]

Training_loss 11.74490


 38%|███▊      | 770/2000 [06:12<10:05,  2.03it/s]

Training_loss 11.72444


 39%|███▊      | 771/2000 [06:13<10:31,  1.95it/s]

Training_loss 11.71365


 39%|███▊      | 772/2000 [06:14<10:50,  1.89it/s]

Training_loss 11.70318


 39%|███▊      | 773/2000 [06:14<10:52,  1.88it/s]

Training_loss 11.68601


 39%|███▊      | 774/2000 [06:15<11:21,  1.80it/s]

Training_loss 11.65949


 39%|███▉      | 775/2000 [06:15<11:23,  1.79it/s]

Training_loss 11.63806


 39%|███▉      | 776/2000 [06:16<11:30,  1.77it/s]

Training_loss 11.62587


 39%|███▉      | 777/2000 [06:16<11:34,  1.76it/s]

Training_loss 11.60412


 39%|███▉      | 778/2000 [06:17<11:09,  1.83it/s]

Training_loss 11.59973


 39%|███▉      | 779/2000 [06:18<11:02,  1.84it/s]

Training_loss 11.58920


 39%|███▉      | 780/2000 [06:18<10:59,  1.85it/s]

Training_loss 11.57178


 39%|███▉      | 781/2000 [06:19<10:45,  1.89it/s]

Training_loss 11.55026


 39%|███▉      | 782/2000 [06:19<11:00,  1.84it/s]

Training_loss 11.54002


 39%|███▉      | 783/2000 [06:20<11:02,  1.84it/s]

Training_loss 11.52062


 39%|███▉      | 784/2000 [06:20<10:49,  1.87it/s]

Training_loss 11.49706


 39%|███▉      | 785/2000 [06:21<10:35,  1.91it/s]

Training_loss 11.49441


 39%|███▉      | 786/2000 [06:21<10:32,  1.92it/s]

Training_loss 11.47880


 39%|███▉      | 787/2000 [06:22<10:40,  1.90it/s]

Training_loss 11.46991


 39%|███▉      | 788/2000 [06:22<10:25,  1.94it/s]

Training_loss 11.44026


 39%|███▉      | 789/2000 [06:23<10:18,  1.96it/s]

Training_loss 11.42647


 40%|███▉      | 790/2000 [06:23<10:41,  1.89it/s]

Training_loss 11.40403


 40%|███▉      | 791/2000 [06:24<10:30,  1.92it/s]

Training_loss 11.38690


 40%|███▉      | 792/2000 [06:24<09:40,  2.08it/s]

Training_loss 11.37050


 40%|███▉      | 793/2000 [06:24<08:17,  2.43it/s]

Training_loss 11.36450


 40%|███▉      | 794/2000 [06:25<08:09,  2.47it/s]

Training_loss 11.33963


 40%|███▉      | 795/2000 [06:25<07:37,  2.63it/s]

Training_loss 11.33453


 40%|███▉      | 796/2000 [06:26<08:20,  2.41it/s]

Training_loss 11.32195


 40%|███▉      | 797/2000 [06:26<09:04,  2.21it/s]

Training_loss 11.30512


 40%|███▉      | 798/2000 [06:27<09:32,  2.10it/s]

Training_loss 11.28233


 40%|███▉      | 799/2000 [06:27<09:29,  2.11it/s]

Training_loss 11.27411


 40%|████      | 800/2000 [06:28<09:54,  2.02it/s]

Training_loss 11.26144


 40%|████      | 801/2000 [06:28<09:51,  2.03it/s]

Training_loss 11.24935


 40%|████      | 802/2000 [06:29<09:59,  2.00it/s]

Training_loss 11.23524


 40%|████      | 803/2000 [06:29<10:01,  1.99it/s]

Training_loss 11.23286


 40%|████      | 804/2000 [06:30<10:14,  1.95it/s]

Training_loss 11.21281


 40%|████      | 805/2000 [06:30<09:45,  2.04it/s]

Training_loss 11.20341


 40%|████      | 806/2000 [06:31<10:01,  1.99it/s]

Training_loss 11.19487


 40%|████      | 807/2000 [06:31<09:59,  1.99it/s]

Training_loss 11.19206


 40%|████      | 808/2000 [06:32<10:06,  1.96it/s]

Training_loss 11.18022


 40%|████      | 809/2000 [06:32<10:00,  1.98it/s]

Training_loss 11.16376


 40%|████      | 810/2000 [06:33<10:16,  1.93it/s]

Training_loss 11.15425


 41%|████      | 811/2000 [06:33<08:38,  2.30it/s]

Training_loss 11.14592


 41%|████      | 812/2000 [06:34<08:55,  2.22it/s]

Training_loss 11.12026


 41%|████      | 813/2000 [06:34<08:06,  2.44it/s]

Training_loss 11.10753


 41%|████      | 814/2000 [06:34<07:20,  2.69it/s]

Training_loss 11.09651


 41%|████      | 815/2000 [06:35<08:13,  2.40it/s]

Training_loss 11.07718


 41%|████      | 816/2000 [06:35<08:22,  2.36it/s]

Training_loss 11.06948


 41%|████      | 817/2000 [06:35<07:51,  2.51it/s]

Training_loss 11.05894


 41%|████      | 818/2000 [06:36<07:11,  2.74it/s]

Training_loss 11.04540


 41%|████      | 819/2000 [06:36<06:22,  3.09it/s]

Training_loss 11.05391


 41%|████      | 820/2000 [06:36<05:50,  3.36it/s]

Training_loss 11.04365


 41%|████      | 821/2000 [06:36<05:35,  3.51it/s]

Training_loss 11.02476


 41%|████      | 822/2000 [06:37<05:21,  3.66it/s]

Training_loss 11.02421


 41%|████      | 823/2000 [06:37<06:26,  3.04it/s]

Training_loss 11.00672


 41%|████      | 824/2000 [06:37<06:04,  3.23it/s]

Training_loss 10.99087


 41%|████▏     | 825/2000 [06:38<06:34,  2.98it/s]

Training_loss 10.96849


 41%|████▏     | 826/2000 [06:38<06:24,  3.05it/s]

Training_loss 10.95459


 41%|████▏     | 827/2000 [06:38<05:58,  3.27it/s]

Training_loss 10.94302


 41%|████▏     | 828/2000 [06:39<05:40,  3.44it/s]

Training_loss 10.92525


 41%|████▏     | 829/2000 [06:39<05:30,  3.54it/s]

Training_loss 10.91192


 42%|████▏     | 830/2000 [06:39<05:59,  3.26it/s]

Training_loss 10.89351


 42%|████▏     | 831/2000 [06:40<06:11,  3.15it/s]

Training_loss 10.88045


 42%|████▏     | 832/2000 [06:40<06:47,  2.86it/s]

Training_loss 10.86868


 42%|████▏     | 833/2000 [06:41<07:27,  2.61it/s]

Training_loss 10.84972


 42%|████▏     | 834/2000 [06:41<07:54,  2.46it/s]

Training_loss 10.83292


 42%|████▏     | 835/2000 [06:41<08:18,  2.34it/s]

Training_loss 10.82558


 42%|████▏     | 836/2000 [06:42<08:37,  2.25it/s]

Training_loss 10.81562


 42%|████▏     | 837/2000 [06:42<08:14,  2.35it/s]

Training_loss 10.80057


 42%|████▏     | 838/2000 [06:43<09:03,  2.14it/s]

Training_loss 10.78857


 42%|████▏     | 839/2000 [06:43<09:56,  1.95it/s]

Training_loss 10.76558


 42%|████▏     | 840/2000 [06:44<10:39,  1.81it/s]

Training_loss 10.75441


 42%|████▏     | 841/2000 [06:45<10:52,  1.78it/s]

Training_loss 10.75197


 42%|████▏     | 842/2000 [06:45<10:55,  1.77it/s]

Training_loss 10.74210


 42%|████▏     | 843/2000 [06:46<11:27,  1.68it/s]

Training_loss 10.73030


 42%|████▏     | 844/2000 [06:46<11:04,  1.74it/s]

Training_loss 10.71594


 42%|████▏     | 845/2000 [06:47<11:22,  1.69it/s]

Training_loss 10.70144


 42%|████▏     | 846/2000 [06:48<11:11,  1.72it/s]

Training_loss 10.68593


 42%|████▏     | 847/2000 [06:48<11:02,  1.74it/s]

Training_loss 10.66785


 42%|████▏     | 848/2000 [06:49<10:15,  1.87it/s]

Training_loss 10.65274


 42%|████▏     | 849/2000 [06:49<08:46,  2.19it/s]

Training_loss 10.64742


 42%|████▎     | 850/2000 [06:49<07:39,  2.50it/s]

Training_loss 10.63008


 43%|████▎     | 851/2000 [06:49<06:57,  2.76it/s]

Training_loss 10.61508


 43%|████▎     | 852/2000 [06:50<06:21,  3.01it/s]

Training_loss 10.60355


 43%|████▎     | 853/2000 [06:50<06:00,  3.18it/s]

Training_loss 10.60267


 43%|████▎     | 854/2000 [06:50<05:45,  3.32it/s]

Training_loss 10.58692


 43%|████▎     | 855/2000 [06:51<05:38,  3.38it/s]

Training_loss 10.57952


 43%|████▎     | 856/2000 [06:51<05:33,  3.43it/s]

Training_loss 10.57334


 43%|████▎     | 857/2000 [06:51<05:26,  3.50it/s]

Training_loss 10.57233


 43%|████▎     | 858/2000 [06:51<05:25,  3.50it/s]

Training_loss 10.56757


 43%|████▎     | 859/2000 [06:52<05:29,  3.46it/s]

Training_loss 10.54824


 43%|████▎     | 860/2000 [06:52<05:30,  3.45it/s]

Training_loss 10.54114


 43%|████▎     | 861/2000 [06:52<05:21,  3.54it/s]

Training_loss 10.53036


 43%|████▎     | 862/2000 [06:53<05:15,  3.61it/s]

Training_loss 10.52069


 43%|████▎     | 863/2000 [06:53<05:09,  3.67it/s]

Training_loss 10.50856


 43%|████▎     | 864/2000 [06:53<05:12,  3.63it/s]

Training_loss 10.49563


 43%|████▎     | 865/2000 [06:53<05:11,  3.64it/s]

Training_loss 10.48681


 43%|████▎     | 866/2000 [06:54<05:17,  3.57it/s]

Training_loss 10.47463


 43%|████▎     | 867/2000 [06:54<05:19,  3.55it/s]

Training_loss 10.46989


 43%|████▎     | 868/2000 [06:54<05:17,  3.57it/s]

Training_loss 10.46492


 43%|████▎     | 869/2000 [06:55<05:38,  3.34it/s]

Training_loss 10.45478


 44%|████▎     | 870/2000 [06:55<07:13,  2.61it/s]

Training_loss 10.44069


 44%|████▎     | 871/2000 [06:56<07:55,  2.38it/s]

Training_loss 10.43843


 44%|████▎     | 872/2000 [06:56<08:26,  2.23it/s]

Training_loss 10.43136


 44%|████▎     | 873/2000 [06:57<07:53,  2.38it/s]

Training_loss 10.42453


 44%|████▎     | 874/2000 [06:57<08:46,  2.14it/s]

Training_loss 10.41168


 44%|████▍     | 875/2000 [06:57<08:08,  2.30it/s]

Training_loss 10.40707


 44%|████▍     | 876/2000 [06:58<08:35,  2.18it/s]

Training_loss 10.40284


 44%|████▍     | 877/2000 [06:59<09:04,  2.06it/s]

Training_loss 10.39142


 44%|████▍     | 878/2000 [06:59<09:44,  1.92it/s]

Training_loss 10.37894


 44%|████▍     | 879/2000 [07:00<10:05,  1.85it/s]

Training_loss 10.37031


 44%|████▍     | 880/2000 [07:00<10:34,  1.77it/s]

Training_loss 10.36128


 44%|████▍     | 881/2000 [07:01<10:59,  1.70it/s]

Training_loss 10.35656


 44%|████▍     | 882/2000 [07:01<10:30,  1.77it/s]

Training_loss 10.34563


 44%|████▍     | 883/2000 [07:02<10:22,  1.79it/s]

Training_loss 10.33592


 44%|████▍     | 884/2000 [07:03<10:26,  1.78it/s]

Training_loss 10.32518


 44%|████▍     | 885/2000 [07:03<10:24,  1.79it/s]

Training_loss 10.31604


 44%|████▍     | 886/2000 [07:04<10:06,  1.84it/s]

Training_loss 10.30462


 44%|████▍     | 887/2000 [07:04<10:03,  1.84it/s]

Training_loss 10.29801


 44%|████▍     | 888/2000 [07:05<09:58,  1.86it/s]

Training_loss 10.29434


 44%|████▍     | 889/2000 [07:05<10:41,  1.73it/s]

Training_loss 10.28167


 44%|████▍     | 890/2000 [07:06<10:26,  1.77it/s]

Training_loss 10.27238


 45%|████▍     | 891/2000 [07:07<10:38,  1.74it/s]

Training_loss 10.26771


 45%|████▍     | 892/2000 [07:07<10:36,  1.74it/s]

Training_loss 10.26000


 45%|████▍     | 893/2000 [07:08<10:16,  1.80it/s]

Training_loss 10.25705


 45%|████▍     | 894/2000 [07:08<10:31,  1.75it/s]

Training_loss 10.25612


 45%|████▍     | 895/2000 [07:09<10:34,  1.74it/s]

Training_loss 10.25364


 45%|████▍     | 896/2000 [07:09<10:52,  1.69it/s]

Training_loss 10.24748


 45%|████▍     | 897/2000 [07:10<10:56,  1.68it/s]

Training_loss 10.23560


 45%|████▍     | 898/2000 [07:11<10:32,  1.74it/s]

Training_loss 10.22776


 45%|████▍     | 899/2000 [07:11<10:39,  1.72it/s]

Training_loss 10.21930


 45%|████▌     | 900/2000 [07:12<10:30,  1.74it/s]

Training_loss 10.21319


 45%|████▌     | 901/2000 [07:12<10:04,  1.82it/s]

Training_loss 10.20975


 45%|████▌     | 902/2000 [07:12<08:38,  2.12it/s]

Training_loss 10.21268


 45%|████▌     | 903/2000 [07:13<07:50,  2.33it/s]

Training_loss 10.20591


 45%|████▌     | 904/2000 [07:13<07:06,  2.57it/s]

Training_loss 10.20305


 45%|████▌     | 905/2000 [07:14<07:11,  2.54it/s]

Training_loss 10.19598


 45%|████▌     | 906/2000 [07:14<08:17,  2.20it/s]

Training_loss 10.19104


 45%|████▌     | 907/2000 [07:15<08:42,  2.09it/s]

Training_loss 10.18248


 45%|████▌     | 908/2000 [07:15<09:15,  1.96it/s]

Training_loss 10.17107


 45%|████▌     | 909/2000 [07:16<09:31,  1.91it/s]

Training_loss 10.16100


 46%|████▌     | 910/2000 [07:16<09:39,  1.88it/s]

Training_loss 10.14949


 46%|████▌     | 911/2000 [07:17<09:51,  1.84it/s]

Training_loss 10.14277


 46%|████▌     | 912/2000 [07:17<09:59,  1.81it/s]

Training_loss 10.14223


 46%|████▌     | 913/2000 [07:18<10:01,  1.81it/s]

Training_loss 10.13465


 46%|████▌     | 914/2000 [07:19<10:06,  1.79it/s]

Training_loss 10.12941


 46%|████▌     | 915/2000 [07:19<10:05,  1.79it/s]

Training_loss 10.11453


 46%|████▌     | 916/2000 [07:20<09:59,  1.81it/s]

Training_loss 10.11525


 46%|████▌     | 917/2000 [07:20<10:11,  1.77it/s]

Training_loss 10.10222


 46%|████▌     | 918/2000 [07:21<10:26,  1.73it/s]

Training_loss 10.09450


 46%|████▌     | 919/2000 [07:21<10:29,  1.72it/s]

Training_loss 10.08649


 46%|████▌     | 920/2000 [07:22<10:29,  1.72it/s]

Training_loss 10.08807


 46%|████▌     | 921/2000 [07:23<10:54,  1.65it/s]

Training_loss 10.09118


 46%|████▌     | 922/2000 [07:23<11:05,  1.62it/s]

Training_loss 10.07601


 46%|████▌     | 923/2000 [07:24<11:04,  1.62it/s]

Training_loss 10.06660


 46%|████▌     | 924/2000 [07:25<10:51,  1.65it/s]

Training_loss 10.06194


 46%|████▋     | 925/2000 [07:25<10:59,  1.63it/s]

Training_loss 10.06460


 46%|████▋     | 926/2000 [07:26<10:47,  1.66it/s]

Training_loss 10.05773


 46%|████▋     | 927/2000 [07:26<10:41,  1.67it/s]

Training_loss 10.05068


 46%|████▋     | 928/2000 [07:27<10:44,  1.66it/s]

Training_loss 10.04706


 46%|████▋     | 929/2000 [07:28<10:46,  1.66it/s]

Training_loss 10.03929


 46%|████▋     | 930/2000 [07:28<10:22,  1.72it/s]

Training_loss 10.03009


 47%|████▋     | 931/2000 [07:29<09:22,  1.90it/s]

Training_loss 10.02590


 47%|████▋     | 932/2000 [07:29<08:37,  2.06it/s]

Training_loss 10.01618


 47%|████▋     | 933/2000 [07:29<07:58,  2.23it/s]

Training_loss 10.00883


 47%|████▋     | 934/2000 [07:30<08:05,  2.20it/s]

Training_loss 10.01090


 47%|████▋     | 935/2000 [07:30<08:47,  2.02it/s]

Training_loss 10.00405


 47%|████▋     | 936/2000 [07:31<09:11,  1.93it/s]

Training_loss 9.99994


 47%|████▋     | 937/2000 [07:31<09:29,  1.87it/s]

Training_loss 9.99501


 47%|████▋     | 938/2000 [07:32<09:45,  1.81it/s]

Training_loss 9.99113


 47%|████▋     | 939/2000 [07:33<09:58,  1.77it/s]

Training_loss 9.98026


 47%|████▋     | 940/2000 [07:33<10:04,  1.75it/s]

Training_loss 9.97152


 47%|████▋     | 941/2000 [07:34<09:49,  1.80it/s]

Training_loss 9.96885


 47%|████▋     | 942/2000 [07:34<09:51,  1.79it/s]

Training_loss 9.96109


 47%|████▋     | 943/2000 [07:35<09:38,  1.83it/s]

Training_loss 9.95935


 47%|████▋     | 944/2000 [07:35<09:41,  1.82it/s]

Training_loss 9.95061


 47%|████▋     | 945/2000 [07:36<10:02,  1.75it/s]

Training_loss 9.94609


 47%|████▋     | 946/2000 [07:37<09:59,  1.76it/s]

Training_loss 9.94339


 47%|████▋     | 947/2000 [07:37<09:48,  1.79it/s]

Training_loss 9.93914


 47%|████▋     | 948/2000 [07:38<09:52,  1.78it/s]

Training_loss 9.93122


 47%|████▋     | 949/2000 [07:38<09:35,  1.83it/s]

Training_loss 9.93174


 48%|████▊     | 950/2000 [07:39<09:15,  1.89it/s]

Training_loss 9.92221


 48%|████▊     | 951/2000 [07:39<09:19,  1.87it/s]

Training_loss 9.91780


 48%|████▊     | 952/2000 [07:40<09:36,  1.82it/s]

Training_loss 9.90857


 48%|████▊     | 953/2000 [07:40<09:14,  1.89it/s]

Training_loss 9.90632


 48%|████▊     | 954/2000 [07:41<09:17,  1.88it/s]

Training_loss 9.90094


 48%|████▊     | 955/2000 [07:41<09:23,  1.86it/s]

Training_loss 9.90164


 48%|████▊     | 956/2000 [07:42<09:16,  1.88it/s]

Training_loss 9.89082


 48%|████▊     | 957/2000 [07:42<09:02,  1.92it/s]

Training_loss 9.87929


 48%|████▊     | 958/2000 [07:43<09:03,  1.92it/s]

Training_loss 9.87303


 48%|████▊     | 959/2000 [07:43<08:51,  1.96it/s]

Training_loss 9.87417


 48%|████▊     | 960/2000 [07:44<08:52,  1.95it/s]

Training_loss 9.86725


 48%|████▊     | 961/2000 [07:44<08:18,  2.08it/s]

Training_loss 9.85611


 48%|████▊     | 962/2000 [07:45<08:35,  2.01it/s]

Training_loss 9.84941


 48%|████▊     | 963/2000 [07:45<08:04,  2.14it/s]

Training_loss 9.84587


 48%|████▊     | 964/2000 [07:46<07:54,  2.19it/s]

Training_loss 9.84219


 48%|████▊     | 965/2000 [07:46<08:03,  2.14it/s]

Training_loss 9.83347


 48%|████▊     | 966/2000 [07:47<08:39,  1.99it/s]

Training_loss 9.83246


 48%|████▊     | 967/2000 [07:47<08:59,  1.91it/s]

Training_loss 9.82557


 48%|████▊     | 968/2000 [07:48<08:50,  1.95it/s]

Training_loss 9.82513


 48%|████▊     | 969/2000 [07:48<09:03,  1.90it/s]

Training_loss 9.81785


 48%|████▊     | 970/2000 [07:49<08:41,  1.98it/s]

Training_loss 9.81202


 49%|████▊     | 971/2000 [07:49<08:44,  1.96it/s]

Training_loss 9.80356


 49%|████▊     | 972/2000 [07:50<08:50,  1.94it/s]

Training_loss 9.80689


 49%|████▊     | 973/2000 [07:50<08:31,  2.01it/s]

Training_loss 9.79298


 49%|████▊     | 974/2000 [07:51<08:22,  2.04it/s]

Training_loss 9.78584


 49%|████▉     | 975/2000 [07:51<08:25,  2.03it/s]

Training_loss 9.77583


 49%|████▉     | 976/2000 [07:52<08:23,  2.03it/s]

Training_loss 9.76880


 49%|████▉     | 977/2000 [07:52<07:21,  2.32it/s]

Training_loss 9.76407


 49%|████▉     | 978/2000 [07:53<07:03,  2.42it/s]

Training_loss 9.75741


 49%|████▉     | 979/2000 [07:53<07:31,  2.26it/s]

Training_loss 9.75133


 49%|████▉     | 980/2000 [07:53<07:38,  2.22it/s]

Training_loss 9.75192


 49%|████▉     | 981/2000 [07:54<07:45,  2.19it/s]

Training_loss 9.74060


 49%|████▉     | 982/2000 [07:54<07:42,  2.20it/s]

Training_loss 9.74186


 49%|████▉     | 983/2000 [07:55<07:56,  2.14it/s]

Training_loss 9.73601


 49%|████▉     | 984/2000 [07:55<08:19,  2.03it/s]

Training_loss 9.72421


 49%|████▉     | 985/2000 [07:56<08:07,  2.08it/s]

Training_loss 9.71560


 49%|████▉     | 986/2000 [07:56<08:13,  2.05it/s]

Training_loss 9.71240


 49%|████▉     | 987/2000 [07:57<08:00,  2.11it/s]

Training_loss 9.69424


 49%|████▉     | 988/2000 [07:57<08:11,  2.06it/s]

Training_loss 9.68668


 49%|████▉     | 989/2000 [07:58<08:16,  2.03it/s]

Training_loss 9.68059


 50%|████▉     | 990/2000 [07:58<08:07,  2.07it/s]

Training_loss 9.68160


 50%|████▉     | 991/2000 [07:59<08:00,  2.10it/s]

Training_loss 9.67328


 50%|████▉     | 992/2000 [07:59<08:00,  2.10it/s]

Training_loss 9.67037


 50%|████▉     | 993/2000 [08:00<08:03,  2.08it/s]

Training_loss 9.66645


 50%|████▉     | 994/2000 [08:00<07:59,  2.10it/s]

Training_loss 9.66012


 50%|████▉     | 995/2000 [08:00<06:52,  2.44it/s]

Training_loss 9.65435


 50%|████▉     | 996/2000 [08:01<06:43,  2.49it/s]

Training_loss 9.64824


 50%|████▉     | 997/2000 [08:01<06:19,  2.64it/s]

Training_loss 9.64374


 50%|████▉     | 998/2000 [08:02<06:46,  2.47it/s]

Training_loss 9.63758


 50%|████▉     | 999/2000 [08:02<07:07,  2.34it/s]

Training_loss 9.63199


 50%|█████     | 1000/2000 [08:03<07:23,  2.26it/s]

Training_loss 9.62488


 50%|█████     | 1001/2000 [08:03<07:54,  2.11it/s]

Training_loss 9.61708


 50%|█████     | 1002/2000 [08:04<07:54,  2.10it/s]

Training_loss 9.61323


 50%|█████     | 1003/2000 [08:04<07:55,  2.10it/s]

Training_loss 9.60482


 50%|█████     | 1004/2000 [08:05<07:59,  2.08it/s]

Training_loss 9.59725


 50%|█████     | 1005/2000 [08:05<08:05,  2.05it/s]

Training_loss 9.59211


 50%|█████     | 1006/2000 [08:06<07:57,  2.08it/s]

Training_loss 9.59121


 50%|█████     | 1007/2000 [08:06<08:14,  2.01it/s]

Training_loss 9.58559


 50%|█████     | 1008/2000 [08:07<08:14,  2.00it/s]

Training_loss 9.58072


 50%|█████     | 1009/2000 [08:07<08:06,  2.04it/s]

Training_loss 9.57605


 50%|█████     | 1010/2000 [08:08<08:06,  2.03it/s]

Training_loss 9.57362


 51%|█████     | 1011/2000 [08:08<08:00,  2.06it/s]

Training_loss 9.56832


 51%|█████     | 1012/2000 [08:08<07:47,  2.11it/s]

Training_loss 9.56282


 51%|█████     | 1013/2000 [08:09<07:52,  2.09it/s]

Training_loss 9.56415


 51%|█████     | 1014/2000 [08:10<08:09,  2.02it/s]

Training_loss 9.56103


 51%|█████     | 1015/2000 [08:10<07:57,  2.06it/s]

Training_loss 9.54558


 51%|█████     | 1016/2000 [08:11<08:15,  1.99it/s]

Training_loss 9.53616


 51%|█████     | 1017/2000 [08:11<07:55,  2.07it/s]

Training_loss 9.53642


 51%|█████     | 1018/2000 [08:11<07:59,  2.05it/s]

Training_loss 9.53245


 51%|█████     | 1019/2000 [08:12<07:50,  2.08it/s]

Training_loss 9.52919


 51%|█████     | 1020/2000 [08:12<07:45,  2.11it/s]

Training_loss 9.52519


 51%|█████     | 1021/2000 [08:13<07:45,  2.10it/s]

Training_loss 9.50801


 51%|█████     | 1022/2000 [08:13<07:46,  2.09it/s]

Training_loss 9.51170


 51%|█████     | 1023/2000 [08:14<07:52,  2.07it/s]

Training_loss 9.50264


 51%|█████     | 1024/2000 [08:14<08:06,  2.01it/s]

Training_loss 9.49689


 51%|█████▏    | 1025/2000 [08:15<08:05,  2.01it/s]

Training_loss 9.49105


 51%|█████▏    | 1026/2000 [08:15<07:57,  2.04it/s]

Training_loss 9.48265


 51%|█████▏    | 1027/2000 [08:16<08:07,  1.99it/s]

Training_loss 9.47724


 51%|█████▏    | 1028/2000 [08:16<07:15,  2.23it/s]

Training_loss 9.46821


 51%|█████▏    | 1029/2000 [08:17<07:31,  2.15it/s]

Training_loss 9.46049


 52%|█████▏    | 1030/2000 [08:17<06:52,  2.35it/s]

Training_loss 9.45514


 52%|█████▏    | 1031/2000 [08:17<06:51,  2.36it/s]

Training_loss 9.44601


 52%|█████▏    | 1032/2000 [08:18<07:10,  2.25it/s]

Training_loss 9.43635


 52%|█████▏    | 1033/2000 [08:18<07:31,  2.14it/s]

Training_loss 9.42691


 52%|█████▏    | 1034/2000 [08:19<07:39,  2.10it/s]

Training_loss 9.41777


 52%|█████▏    | 1035/2000 [08:19<07:47,  2.06it/s]

Training_loss 9.41485


 52%|█████▏    | 1036/2000 [08:20<07:47,  2.06it/s]

Training_loss 9.40650


 52%|█████▏    | 1037/2000 [08:20<07:46,  2.07it/s]

Training_loss 9.40128


 52%|█████▏    | 1038/2000 [08:21<07:50,  2.04it/s]

Training_loss 9.39175


 52%|█████▏    | 1039/2000 [08:21<07:43,  2.07it/s]

Training_loss 9.38810


 52%|█████▏    | 1040/2000 [08:22<07:58,  2.01it/s]

Training_loss 9.38976


 52%|█████▏    | 1041/2000 [08:22<08:10,  1.96it/s]

Training_loss 9.38141


 52%|█████▏    | 1042/2000 [08:23<08:03,  1.98it/s]

Training_loss 9.38364


 52%|█████▏    | 1043/2000 [08:23<07:53,  2.02it/s]

Training_loss 9.37547


 52%|█████▏    | 1044/2000 [08:24<08:01,  1.99it/s]

Training_loss 9.37129


 52%|█████▏    | 1045/2000 [08:24<07:50,  2.03it/s]

Training_loss 9.36549


 52%|█████▏    | 1046/2000 [08:25<07:55,  2.01it/s]

Training_loss 9.36496


 52%|█████▏    | 1047/2000 [08:25<08:00,  1.98it/s]

Training_loss 9.35475


 52%|█████▏    | 1048/2000 [08:26<08:11,  1.94it/s]

Training_loss 9.34735


 52%|█████▏    | 1049/2000 [08:27<08:45,  1.81it/s]

Training_loss 9.33596


 52%|█████▎    | 1050/2000 [08:27<08:22,  1.89it/s]

Training_loss 9.33050


 53%|█████▎    | 1051/2000 [08:28<08:14,  1.92it/s]

Training_loss 9.32882


 53%|█████▎    | 1052/2000 [08:28<08:16,  1.91it/s]

Training_loss 9.31805


 53%|█████▎    | 1053/2000 [08:29<08:24,  1.88it/s]

Training_loss 9.31704


 53%|█████▎    | 1054/2000 [08:29<08:28,  1.86it/s]

Training_loss 9.30852


 53%|█████▎    | 1055/2000 [08:30<08:35,  1.83it/s]

Training_loss 9.30553


 53%|█████▎    | 1056/2000 [08:30<08:55,  1.76it/s]

Training_loss 9.29169


 53%|█████▎    | 1057/2000 [08:31<08:49,  1.78it/s]

Training_loss 9.28402


 53%|█████▎    | 1058/2000 [08:31<08:30,  1.84it/s]

Training_loss 9.27874


 53%|█████▎    | 1059/2000 [08:32<08:27,  1.85it/s]

Training_loss 9.27472


 53%|█████▎    | 1060/2000 [08:32<08:04,  1.94it/s]

Training_loss 9.27081


 53%|█████▎    | 1061/2000 [08:33<06:59,  2.24it/s]

Training_loss 9.26881


 53%|█████▎    | 1062/2000 [08:33<07:15,  2.15it/s]

Training_loss 9.26360


 53%|█████▎    | 1063/2000 [08:34<06:31,  2.40it/s]

Training_loss 9.25725


 53%|█████▎    | 1064/2000 [08:34<06:04,  2.57it/s]

Training_loss 9.24792


 53%|█████▎    | 1065/2000 [08:34<06:46,  2.30it/s]

Training_loss 9.25105


 53%|█████▎    | 1066/2000 [08:35<06:51,  2.27it/s]

Training_loss 9.24298


 53%|█████▎    | 1067/2000 [08:35<07:16,  2.14it/s]

Training_loss 9.23604


 53%|█████▎    | 1068/2000 [08:36<07:30,  2.07it/s]

Training_loss 9.23607


 53%|█████▎    | 1069/2000 [08:36<07:13,  2.15it/s]

Training_loss 9.22746


 54%|█████▎    | 1070/2000 [08:37<07:30,  2.07it/s]

Training_loss 9.21182


 54%|█████▎    | 1071/2000 [08:37<08:02,  1.93it/s]

Training_loss 9.20851


 54%|█████▎    | 1072/2000 [08:38<08:05,  1.91it/s]

Training_loss 9.20296


 54%|█████▎    | 1073/2000 [08:39<08:08,  1.90it/s]

Training_loss 9.20375


 54%|█████▎    | 1074/2000 [08:39<08:14,  1.87it/s]

Training_loss 9.19217


 54%|█████▍    | 1075/2000 [08:40<08:05,  1.91it/s]

Training_loss 9.18546


 54%|█████▍    | 1076/2000 [08:40<08:33,  1.80it/s]

Training_loss 9.17521


 54%|█████▍    | 1077/2000 [08:41<08:36,  1.79it/s]

Training_loss 9.16499


 54%|█████▍    | 1078/2000 [08:41<08:32,  1.80it/s]

Training_loss 9.15622


 54%|█████▍    | 1079/2000 [08:42<08:26,  1.82it/s]

Training_loss 9.15409


 54%|█████▍    | 1080/2000 [08:42<08:29,  1.80it/s]

Training_loss 9.14710


 54%|█████▍    | 1081/2000 [08:43<08:24,  1.82it/s]

Training_loss 9.14535


 54%|█████▍    | 1082/2000 [08:44<08:23,  1.82it/s]

Training_loss 9.13922


 54%|█████▍    | 1083/2000 [08:44<08:34,  1.78it/s]

Training_loss 9.13475


 54%|█████▍    | 1084/2000 [08:45<08:34,  1.78it/s]

Training_loss 9.13672


 54%|█████▍    | 1085/2000 [08:45<08:38,  1.77it/s]

Training_loss 9.13178


 54%|█████▍    | 1086/2000 [08:46<08:24,  1.81it/s]

Training_loss 9.12539


 54%|█████▍    | 1087/2000 [08:46<08:22,  1.82it/s]

Training_loss 9.11899


 54%|█████▍    | 1088/2000 [08:47<08:26,  1.80it/s]

Training_loss 9.11451


 54%|█████▍    | 1089/2000 [08:47<08:32,  1.78it/s]

Training_loss 9.10600


 55%|█████▍    | 1090/2000 [08:48<08:24,  1.80it/s]

Training_loss 9.10684


 55%|█████▍    | 1091/2000 [08:48<07:35,  2.00it/s]

Training_loss 9.09328


 55%|█████▍    | 1092/2000 [08:49<07:24,  2.04it/s]

Training_loss 9.09099


 55%|█████▍    | 1093/2000 [08:49<07:45,  1.95it/s]

Training_loss 9.08096


 55%|█████▍    | 1094/2000 [08:50<06:39,  2.27it/s]

Training_loss 9.07459


 55%|█████▍    | 1095/2000 [08:50<06:45,  2.23it/s]

Training_loss 9.06834


 55%|█████▍    | 1096/2000 [08:51<07:05,  2.13it/s]

Training_loss 9.06280


 55%|█████▍    | 1097/2000 [08:51<07:33,  1.99it/s]

Training_loss 9.05792


 55%|█████▍    | 1098/2000 [08:52<07:30,  2.00it/s]

Training_loss 9.04874


 55%|█████▍    | 1099/2000 [08:52<07:43,  1.94it/s]

Training_loss 9.04665


 55%|█████▌    | 1100/2000 [08:53<07:51,  1.91it/s]

Training_loss 9.04416


 55%|█████▌    | 1101/2000 [08:53<08:16,  1.81it/s]

Training_loss 9.04189


 55%|█████▌    | 1102/2000 [08:54<08:09,  1.83it/s]

Training_loss 9.04008


 55%|█████▌    | 1103/2000 [08:54<07:56,  1.88it/s]

Training_loss 9.03636


 55%|█████▌    | 1104/2000 [08:55<07:34,  1.97it/s]

Training_loss 9.03336


 55%|█████▌    | 1105/2000 [08:55<07:22,  2.02it/s]

Training_loss 9.02518


 55%|█████▌    | 1106/2000 [08:56<07:21,  2.02it/s]

Training_loss 9.02659


 55%|█████▌    | 1107/2000 [08:56<07:22,  2.02it/s]

Training_loss 9.02176


 55%|█████▌    | 1108/2000 [08:57<07:47,  1.91it/s]

Training_loss 9.01556


 55%|█████▌    | 1109/2000 [08:58<08:09,  1.82it/s]

Training_loss 9.01375


 56%|█████▌    | 1110/2000 [08:58<07:59,  1.86it/s]

Training_loss 9.00436


 56%|█████▌    | 1111/2000 [08:59<07:54,  1.88it/s]

Training_loss 8.99868


 56%|█████▌    | 1112/2000 [08:59<07:47,  1.90it/s]

Training_loss 8.99339


 56%|█████▌    | 1113/2000 [09:00<07:35,  1.95it/s]

Training_loss 8.98718


 56%|█████▌    | 1114/2000 [09:00<07:53,  1.87it/s]

Training_loss 8.97951


 56%|█████▌    | 1115/2000 [09:01<08:08,  1.81it/s]

Training_loss 8.97244


 56%|█████▌    | 1116/2000 [09:01<08:04,  1.83it/s]

Training_loss 8.95301


 56%|█████▌    | 1117/2000 [09:02<08:37,  1.71it/s]

Training_loss 8.94400


 56%|█████▌    | 1118/2000 [09:03<08:41,  1.69it/s]

Training_loss 8.93847


 56%|█████▌    | 1119/2000 [09:03<08:50,  1.66it/s]

Training_loss 8.93294


 56%|█████▌    | 1120/2000 [09:04<08:31,  1.72it/s]

Training_loss 8.92766


 56%|█████▌    | 1121/2000 [09:04<07:51,  1.86it/s]

Training_loss 8.92358


 56%|█████▌    | 1122/2000 [09:05<07:11,  2.03it/s]

Training_loss 8.91628


 56%|█████▌    | 1123/2000 [09:05<06:54,  2.11it/s]

Training_loss 8.91415


 56%|█████▌    | 1124/2000 [09:05<06:13,  2.34it/s]

Training_loss 8.90570


 56%|█████▋    | 1125/2000 [09:06<06:09,  2.37it/s]

Training_loss 8.90032


 56%|█████▋    | 1126/2000 [09:06<06:46,  2.15it/s]

Training_loss 8.89952


 56%|█████▋    | 1127/2000 [09:07<07:13,  2.01it/s]

Training_loss 8.88875


 56%|█████▋    | 1128/2000 [09:07<07:27,  1.95it/s]

Training_loss 8.88550


 56%|█████▋    | 1129/2000 [09:08<07:45,  1.87it/s]

Training_loss 8.87260


 56%|█████▋    | 1130/2000 [09:09<07:53,  1.84it/s]

Training_loss 8.86557


 57%|█████▋    | 1131/2000 [09:09<08:09,  1.78it/s]

Training_loss 8.86020


 57%|█████▋    | 1132/2000 [09:10<08:44,  1.66it/s]

Training_loss 8.85904


 57%|█████▋    | 1133/2000 [09:10<08:31,  1.69it/s]

Training_loss 8.85162


 57%|█████▋    | 1134/2000 [09:11<08:18,  1.74it/s]

Training_loss 8.84653


 57%|█████▋    | 1135/2000 [09:12<08:10,  1.77it/s]

Training_loss 8.84003


 57%|█████▋    | 1136/2000 [09:12<08:06,  1.77it/s]

Training_loss 8.83192


 57%|█████▋    | 1137/2000 [09:13<07:58,  1.80it/s]

Training_loss 8.83201


 57%|█████▋    | 1138/2000 [09:13<07:55,  1.81it/s]

Training_loss 8.82921


 57%|█████▋    | 1139/2000 [09:14<08:06,  1.77it/s]

Training_loss 8.82383


 57%|█████▋    | 1140/2000 [09:14<08:13,  1.74it/s]

Training_loss 8.81642


 57%|█████▋    | 1141/2000 [09:15<08:11,  1.75it/s]

Training_loss 8.81373


 57%|█████▋    | 1142/2000 [09:16<08:25,  1.70it/s]

Training_loss 8.81509


 57%|█████▋    | 1143/2000 [09:16<08:45,  1.63it/s]

Training_loss 8.81004


 57%|█████▋    | 1144/2000 [09:17<08:42,  1.64it/s]

Training_loss 8.80307


 57%|█████▋    | 1145/2000 [09:17<08:10,  1.74it/s]

Training_loss 8.79847


 57%|█████▋    | 1146/2000 [09:18<08:23,  1.70it/s]

Training_loss 8.78915


 57%|█████▋    | 1147/2000 [09:19<08:30,  1.67it/s]

Training_loss 8.78454


 57%|█████▋    | 1148/2000 [09:19<08:38,  1.64it/s]

Training_loss 8.77624


 57%|█████▋    | 1149/2000 [09:20<08:54,  1.59it/s]

Training_loss 8.77779


 57%|█████▊    | 1150/2000 [09:20<08:15,  1.72it/s]

Training_loss 8.76694


 58%|█████▊    | 1151/2000 [09:21<07:27,  1.90it/s]

Training_loss 8.76545


 58%|█████▊    | 1152/2000 [09:21<07:00,  2.02it/s]

Training_loss 8.75998


 58%|█████▊    | 1153/2000 [09:22<07:03,  2.00it/s]

Training_loss 8.75708


 58%|█████▊    | 1154/2000 [09:22<07:42,  1.83it/s]

Training_loss 8.74914


 58%|█████▊    | 1155/2000 [09:23<07:58,  1.77it/s]

Training_loss 8.74138


 58%|█████▊    | 1156/2000 [09:24<07:56,  1.77it/s]

Training_loss 8.73320


 58%|█████▊    | 1157/2000 [09:24<08:15,  1.70it/s]

Training_loss 8.72592


 58%|█████▊    | 1158/2000 [09:25<08:26,  1.66it/s]

Training_loss 8.72117


 58%|█████▊    | 1159/2000 [09:25<08:10,  1.71it/s]

Training_loss 8.71342


 58%|█████▊    | 1160/2000 [09:26<08:01,  1.74it/s]

Training_loss 8.70418


 58%|█████▊    | 1161/2000 [09:26<07:37,  1.83it/s]

Training_loss 8.69888


 58%|█████▊    | 1162/2000 [09:27<07:25,  1.88it/s]

Training_loss 8.69106


 58%|█████▊    | 1163/2000 [09:27<07:18,  1.91it/s]

Training_loss 8.67793


 58%|█████▊    | 1164/2000 [09:28<07:14,  1.92it/s]

Training_loss 8.67324


 58%|█████▊    | 1165/2000 [09:28<07:24,  1.88it/s]

Training_loss 8.66530


 58%|█████▊    | 1166/2000 [09:29<07:27,  1.86it/s]

Training_loss 8.66668


 58%|█████▊    | 1167/2000 [09:30<07:20,  1.89it/s]

Training_loss 8.66992


 58%|█████▊    | 1168/2000 [09:30<07:16,  1.90it/s]

Training_loss 8.66431


 58%|█████▊    | 1169/2000 [09:31<07:18,  1.90it/s]

Training_loss 8.65252


 58%|█████▊    | 1170/2000 [09:31<07:12,  1.92it/s]

Training_loss 8.65392


 59%|█████▊    | 1171/2000 [09:32<07:26,  1.86it/s]

Training_loss 8.64577


 59%|█████▊    | 1172/2000 [09:32<07:24,  1.86it/s]

Training_loss 8.63648


 59%|█████▊    | 1173/2000 [09:33<07:18,  1.89it/s]

Training_loss 8.63686


 59%|█████▊    | 1174/2000 [09:33<07:06,  1.94it/s]

Training_loss 8.62521


 59%|█████▉    | 1175/2000 [09:34<07:04,  1.94it/s]

Training_loss 8.61900


 59%|█████▉    | 1176/2000 [09:34<07:23,  1.86it/s]

Training_loss 8.60690


 59%|█████▉    | 1177/2000 [09:35<07:23,  1.86it/s]

Training_loss 8.60106


 59%|█████▉    | 1178/2000 [09:35<07:17,  1.88it/s]

Training_loss 8.59713


 59%|█████▉    | 1179/2000 [09:36<07:19,  1.87it/s]

Training_loss 8.59477


 59%|█████▉    | 1180/2000 [09:36<07:08,  1.91it/s]

Training_loss 8.58822


 59%|█████▉    | 1181/2000 [09:37<06:56,  1.97it/s]

Training_loss 8.58275


 59%|█████▉    | 1182/2000 [09:37<06:29,  2.10it/s]

Training_loss 8.58250


 59%|█████▉    | 1183/2000 [09:38<06:34,  2.07it/s]

Training_loss 8.58003


 59%|█████▉    | 1184/2000 [09:38<06:44,  2.01it/s]

Training_loss 8.57336


 59%|█████▉    | 1185/2000 [09:39<06:56,  1.96it/s]

Training_loss 8.56353


 59%|█████▉    | 1186/2000 [09:39<07:11,  1.89it/s]

Training_loss 8.55652


 59%|█████▉    | 1187/2000 [09:40<07:26,  1.82it/s]

Training_loss 8.54905


 59%|█████▉    | 1188/2000 [09:41<07:19,  1.85it/s]

Training_loss 8.54648


 59%|█████▉    | 1189/2000 [09:41<07:18,  1.85it/s]

Training_loss 8.54233


 60%|█████▉    | 1190/2000 [09:42<07:05,  1.90it/s]

Training_loss 8.53278


 60%|█████▉    | 1191/2000 [09:42<07:08,  1.89it/s]

Training_loss 8.53192


 60%|█████▉    | 1192/2000 [09:43<07:15,  1.86it/s]

Training_loss 8.53119


 60%|█████▉    | 1193/2000 [09:43<07:19,  1.84it/s]

Training_loss 8.52148


 60%|█████▉    | 1194/2000 [09:44<07:17,  1.84it/s]

Training_loss 8.52117


 60%|█████▉    | 1195/2000 [09:44<07:18,  1.84it/s]

Training_loss 8.51838


 60%|█████▉    | 1196/2000 [09:45<07:11,  1.86it/s]

Training_loss 8.50700


 60%|█████▉    | 1197/2000 [09:45<07:00,  1.91it/s]

Training_loss 8.50022


 60%|█████▉    | 1198/2000 [09:46<06:52,  1.95it/s]

Training_loss 8.49362


 60%|█████▉    | 1199/2000 [09:46<06:52,  1.94it/s]

Training_loss 8.49146


 60%|██████    | 1200/2000 [09:47<06:53,  1.93it/s]

Training_loss 8.48653


 60%|██████    | 1201/2000 [09:48<07:36,  1.75it/s]

Training_loss 8.48163


 60%|██████    | 1202/2000 [09:48<08:01,  1.66it/s]

Training_loss 8.47925


 60%|██████    | 1203/2000 [09:49<07:43,  1.72it/s]

Training_loss 8.47088


 60%|██████    | 1204/2000 [09:49<07:43,  1.72it/s]

Training_loss 8.46602


 60%|██████    | 1205/2000 [09:50<07:36,  1.74it/s]

Training_loss 8.45477


 60%|██████    | 1206/2000 [09:50<07:26,  1.78it/s]

Training_loss 8.44440


 60%|██████    | 1207/2000 [09:51<07:29,  1.77it/s]

Training_loss 8.44305


 60%|██████    | 1208/2000 [09:52<07:28,  1.77it/s]

Training_loss 8.43466


 60%|██████    | 1209/2000 [09:52<07:18,  1.80it/s]

Training_loss 8.43379


 60%|██████    | 1210/2000 [09:53<06:49,  1.93it/s]

Training_loss 8.43166


 61%|██████    | 1211/2000 [09:53<06:48,  1.93it/s]

Training_loss 8.42633


 61%|██████    | 1212/2000 [09:54<06:43,  1.95it/s]

Training_loss 8.41880


 61%|██████    | 1213/2000 [09:54<06:29,  2.02it/s]

Training_loss 8.41209


 61%|██████    | 1214/2000 [09:55<06:36,  1.98it/s]

Training_loss 8.40511


 61%|██████    | 1215/2000 [09:55<06:36,  1.98it/s]

Training_loss 8.39817


 61%|██████    | 1216/2000 [09:56<06:31,  2.00it/s]

Training_loss 8.38689


 61%|██████    | 1217/2000 [09:56<06:36,  1.97it/s]

Training_loss 8.38369


 61%|██████    | 1218/2000 [09:57<06:37,  1.97it/s]

Training_loss 8.37988


 61%|██████    | 1219/2000 [09:57<06:40,  1.95it/s]

Training_loss 8.37652


 61%|██████    | 1220/2000 [09:58<06:46,  1.92it/s]

Training_loss 8.36852


 61%|██████    | 1221/2000 [09:58<06:49,  1.90it/s]

Training_loss 8.36421


 61%|██████    | 1222/2000 [09:59<06:42,  1.93it/s]

Training_loss 8.36326


 61%|██████    | 1223/2000 [09:59<07:07,  1.82it/s]

Training_loss 8.36025


 61%|██████    | 1224/2000 [10:00<07:14,  1.79it/s]

Training_loss 8.36039


 61%|██████▏   | 1225/2000 [10:00<07:19,  1.76it/s]

Training_loss 8.34964


 61%|██████▏   | 1226/2000 [10:01<07:31,  1.72it/s]

Training_loss 8.34212


 61%|██████▏   | 1227/2000 [10:02<07:37,  1.69it/s]

Training_loss 8.33938


 61%|██████▏   | 1228/2000 [10:02<07:36,  1.69it/s]

Training_loss 8.33151


 61%|██████▏   | 1229/2000 [10:03<07:44,  1.66it/s]

Training_loss 8.33279


 62%|██████▏   | 1230/2000 [10:03<07:29,  1.71it/s]

Training_loss 8.33067


 62%|██████▏   | 1231/2000 [10:04<07:16,  1.76it/s]

Training_loss 8.32336


 62%|██████▏   | 1232/2000 [10:05<07:19,  1.75it/s]

Training_loss 8.30921


 62%|██████▏   | 1233/2000 [10:05<07:27,  1.71it/s]

Training_loss 8.29863


 62%|██████▏   | 1234/2000 [10:06<07:35,  1.68it/s]

Training_loss 8.29299


 62%|██████▏   | 1235/2000 [10:06<07:44,  1.65it/s]

Training_loss 8.28777


 62%|██████▏   | 1236/2000 [10:07<07:37,  1.67it/s]

Training_loss 8.28618


 62%|██████▏   | 1237/2000 [10:08<07:38,  1.67it/s]

Training_loss 8.27827


 62%|██████▏   | 1238/2000 [10:08<07:31,  1.69it/s]

Training_loss 8.27127


 62%|██████▏   | 1239/2000 [10:09<07:15,  1.75it/s]

Training_loss 8.26648


 62%|██████▏   | 1240/2000 [10:09<06:46,  1.87it/s]

Training_loss 8.26405


 62%|██████▏   | 1241/2000 [10:10<06:49,  1.86it/s]

Training_loss 8.26011


 62%|██████▏   | 1242/2000 [10:10<06:34,  1.92it/s]

Training_loss 8.26538


 62%|██████▏   | 1243/2000 [10:11<06:18,  2.00it/s]

Training_loss 8.25799


 62%|██████▏   | 1244/2000 [10:11<06:29,  1.94it/s]

Training_loss 8.25246


 62%|██████▏   | 1245/2000 [10:12<06:26,  1.95it/s]

Training_loss 8.24545


 62%|██████▏   | 1246/2000 [10:12<06:36,  1.90it/s]

Training_loss 8.23848


 62%|██████▏   | 1247/2000 [10:13<06:50,  1.83it/s]

Training_loss 8.23276


 62%|██████▏   | 1248/2000 [10:13<06:51,  1.83it/s]

Training_loss 8.22496


 62%|██████▏   | 1249/2000 [10:14<06:52,  1.82it/s]

Training_loss 8.21717


 62%|██████▎   | 1250/2000 [10:15<07:03,  1.77it/s]

Training_loss 8.21208


 63%|██████▎   | 1251/2000 [10:15<07:07,  1.75it/s]

Training_loss 8.20282


 63%|██████▎   | 1252/2000 [10:16<07:12,  1.73it/s]

Training_loss 8.19892


 63%|██████▎   | 1253/2000 [10:16<07:39,  1.62it/s]

Training_loss 8.19630


 63%|██████▎   | 1254/2000 [10:17<07:23,  1.68it/s]

Training_loss 8.19257


 63%|██████▎   | 1255/2000 [10:18<07:19,  1.69it/s]

Training_loss 8.18291


 63%|██████▎   | 1256/2000 [10:18<07:12,  1.72it/s]

Training_loss 8.17668


 63%|██████▎   | 1257/2000 [10:19<06:59,  1.77it/s]

Training_loss 8.16710


 63%|██████▎   | 1258/2000 [10:19<06:45,  1.83it/s]

Training_loss 8.16028


 63%|██████▎   | 1259/2000 [10:20<06:42,  1.84it/s]

Training_loss 8.15301


 63%|██████▎   | 1260/2000 [10:20<06:31,  1.89it/s]

Training_loss 8.14808


 63%|██████▎   | 1261/2000 [10:21<06:19,  1.95it/s]

Training_loss 8.14594


 63%|██████▎   | 1262/2000 [10:21<06:08,  2.00it/s]

Training_loss 8.14160


 63%|██████▎   | 1263/2000 [10:22<06:06,  2.01it/s]

Training_loss 8.14070


 63%|██████▎   | 1264/2000 [10:22<05:59,  2.04it/s]

Training_loss 8.13391


 63%|██████▎   | 1265/2000 [10:23<06:01,  2.03it/s]

Training_loss 8.12914


 63%|██████▎   | 1266/2000 [10:23<06:09,  1.99it/s]

Training_loss 8.12534


 63%|██████▎   | 1267/2000 [10:24<06:02,  2.02it/s]

Training_loss 8.11945


 63%|██████▎   | 1268/2000 [10:24<05:50,  2.09it/s]

Training_loss 8.11204


 63%|██████▎   | 1269/2000 [10:24<05:07,  2.37it/s]

Training_loss 8.10297


 64%|██████▎   | 1270/2000 [10:25<05:15,  2.31it/s]

Training_loss 8.09463


 64%|██████▎   | 1271/2000 [10:25<05:02,  2.41it/s]

Training_loss 8.08942


 64%|██████▎   | 1272/2000 [10:26<05:15,  2.31it/s]

Training_loss 8.08260


 64%|██████▎   | 1273/2000 [10:26<05:27,  2.22it/s]

Training_loss 8.07592


 64%|██████▎   | 1274/2000 [10:27<05:40,  2.13it/s]

Training_loss 8.06892


 64%|██████▍   | 1275/2000 [10:27<05:50,  2.07it/s]

Training_loss 8.06222


 64%|██████▍   | 1276/2000 [10:28<05:53,  2.05it/s]

Training_loss 8.05786


 64%|██████▍   | 1277/2000 [10:28<05:55,  2.03it/s]

Training_loss 8.05274


 64%|██████▍   | 1278/2000 [10:29<05:59,  2.01it/s]

Training_loss 8.04444


 64%|██████▍   | 1279/2000 [10:29<05:59,  2.00it/s]

Training_loss 8.03858


 64%|██████▍   | 1280/2000 [10:30<06:06,  1.97it/s]

Training_loss 8.03909


 64%|██████▍   | 1281/2000 [10:30<06:17,  1.91it/s]

Training_loss 8.03148


 64%|██████▍   | 1282/2000 [10:31<06:09,  1.94it/s]

Training_loss 8.02442


 64%|██████▍   | 1283/2000 [10:31<06:12,  1.92it/s]

Training_loss 8.02005


 64%|██████▍   | 1284/2000 [10:32<06:17,  1.90it/s]

Training_loss 8.00794


 64%|██████▍   | 1285/2000 [10:32<06:37,  1.80it/s]

Training_loss 8.00198


 64%|██████▍   | 1286/2000 [10:33<06:49,  1.75it/s]

Training_loss 7.99501


 64%|██████▍   | 1287/2000 [10:34<06:48,  1.74it/s]

Training_loss 7.98656


 64%|██████▍   | 1288/2000 [10:34<06:41,  1.77it/s]

Training_loss 7.97872


 64%|██████▍   | 1289/2000 [10:35<06:27,  1.83it/s]

Training_loss 7.97440


 64%|██████▍   | 1290/2000 [10:35<06:37,  1.78it/s]

Training_loss 7.96608


 65%|██████▍   | 1291/2000 [10:36<06:35,  1.79it/s]

Training_loss 7.96145


 65%|██████▍   | 1292/2000 [10:36<06:28,  1.82it/s]

Training_loss 7.96151


 65%|██████▍   | 1293/2000 [10:37<06:30,  1.81it/s]

Training_loss 7.95674


 65%|██████▍   | 1294/2000 [10:37<06:21,  1.85it/s]

Training_loss 7.94963


 65%|██████▍   | 1295/2000 [10:38<06:23,  1.84it/s]

Training_loss 7.94886


 65%|██████▍   | 1296/2000 [10:39<06:29,  1.81it/s]

Training_loss 7.94197


 65%|██████▍   | 1297/2000 [10:39<06:36,  1.77it/s]

Training_loss 7.93814


 65%|██████▍   | 1298/2000 [10:40<06:31,  1.79it/s]

Training_loss 7.93211


 65%|██████▍   | 1299/2000 [10:40<06:11,  1.89it/s]

Training_loss 7.92534


 65%|██████▌   | 1300/2000 [10:40<05:38,  2.07it/s]

Training_loss 7.92078


 65%|██████▌   | 1301/2000 [10:41<05:35,  2.08it/s]

Training_loss 7.91313


 65%|██████▌   | 1302/2000 [10:41<05:37,  2.07it/s]

Training_loss 7.90950


 65%|██████▌   | 1303/2000 [10:42<05:56,  1.95it/s]

Training_loss 7.90991


 65%|██████▌   | 1304/2000 [10:43<05:57,  1.95it/s]

Training_loss 7.90762


 65%|██████▌   | 1305/2000 [10:43<05:52,  1.97it/s]

Training_loss 7.89857


 65%|██████▌   | 1306/2000 [10:44<05:43,  2.02it/s]

Training_loss 7.89222


 65%|██████▌   | 1307/2000 [10:44<05:48,  1.99it/s]

Training_loss 7.89196


 65%|██████▌   | 1308/2000 [10:44<05:15,  2.19it/s]

Training_loss 7.88950


 65%|██████▌   | 1309/2000 [10:45<05:39,  2.04it/s]

Training_loss 7.88624


 66%|██████▌   | 1310/2000 [10:45<05:41,  2.02it/s]

Training_loss 7.87639


 66%|██████▌   | 1311/2000 [10:46<05:43,  2.01it/s]

Training_loss 7.87295


 66%|██████▌   | 1312/2000 [10:46<05:49,  1.97it/s]

Training_loss 7.86814


 66%|██████▌   | 1313/2000 [10:47<05:44,  1.99it/s]

Training_loss 7.86878


 66%|██████▌   | 1314/2000 [10:48<05:51,  1.95it/s]

Training_loss 7.86809


 66%|██████▌   | 1315/2000 [10:48<05:35,  2.04it/s]

Training_loss 7.86353


 66%|██████▌   | 1316/2000 [10:48<05:41,  2.00it/s]

Training_loss 7.86071


 66%|██████▌   | 1317/2000 [10:49<05:56,  1.92it/s]

Training_loss 7.85837


 66%|██████▌   | 1318/2000 [10:50<06:02,  1.88it/s]

Training_loss 7.84838


 66%|██████▌   | 1319/2000 [10:50<06:04,  1.87it/s]

Training_loss 7.84226


 66%|██████▌   | 1320/2000 [10:51<06:17,  1.80it/s]

Training_loss 7.84099


 66%|██████▌   | 1321/2000 [10:51<06:21,  1.78it/s]

Training_loss 7.84112


 66%|██████▌   | 1322/2000 [10:52<06:25,  1.76it/s]

Training_loss 7.83377


 66%|██████▌   | 1323/2000 [10:52<06:20,  1.78it/s]

Training_loss 7.82472


 66%|██████▌   | 1324/2000 [10:53<06:09,  1.83it/s]

Training_loss 7.81832


 66%|██████▋   | 1325/2000 [10:54<06:09,  1.83it/s]

Training_loss 7.80457


 66%|██████▋   | 1326/2000 [10:54<06:05,  1.84it/s]

Training_loss 7.80244


 66%|██████▋   | 1327/2000 [10:55<06:05,  1.84it/s]

Training_loss 7.80069


 66%|██████▋   | 1328/2000 [10:55<06:07,  1.83it/s]

Training_loss 7.79207


 66%|██████▋   | 1329/2000 [10:56<06:00,  1.86it/s]

Training_loss 7.78682


 66%|██████▋   | 1330/2000 [10:56<05:44,  1.94it/s]

Training_loss 7.77708


 67%|██████▋   | 1331/2000 [10:57<05:40,  1.97it/s]

Training_loss 7.77440


 67%|██████▋   | 1332/2000 [10:57<05:59,  1.86it/s]

Training_loss 7.76084


 67%|██████▋   | 1333/2000 [10:58<05:42,  1.95it/s]

Training_loss 7.74959


 67%|██████▋   | 1334/2000 [10:58<05:45,  1.93it/s]

Training_loss 7.74603


 67%|██████▋   | 1335/2000 [10:59<05:43,  1.94it/s]

Training_loss 7.73976


 67%|██████▋   | 1336/2000 [10:59<05:42,  1.94it/s]

Training_loss 7.74015


 67%|██████▋   | 1337/2000 [11:00<05:37,  1.96it/s]

Training_loss 7.73314


 67%|██████▋   | 1338/2000 [11:00<05:52,  1.88it/s]

Training_loss 7.72263


 67%|██████▋   | 1339/2000 [11:01<06:00,  1.83it/s]

Training_loss 7.70989


 67%|██████▋   | 1340/2000 [11:01<05:53,  1.87it/s]

Training_loss 7.70836


 67%|██████▋   | 1341/2000 [11:02<05:55,  1.85it/s]

Training_loss 7.70194


 67%|██████▋   | 1342/2000 [11:03<06:11,  1.77it/s]

Training_loss 7.70197


 67%|██████▋   | 1343/2000 [11:03<06:14,  1.75it/s]

Training_loss 7.69669


 67%|██████▋   | 1344/2000 [11:04<06:05,  1.79it/s]

Training_loss 7.68892


 67%|██████▋   | 1345/2000 [11:04<06:03,  1.80it/s]

Training_loss 7.68348


 67%|██████▋   | 1346/2000 [11:05<06:06,  1.78it/s]

Training_loss 7.67399


 67%|██████▋   | 1347/2000 [11:05<05:59,  1.82it/s]

Training_loss 7.67077


 67%|██████▋   | 1348/2000 [11:06<05:57,  1.82it/s]

Training_loss 7.65868


 67%|██████▋   | 1349/2000 [11:06<05:55,  1.83it/s]

Training_loss 7.65423


 68%|██████▊   | 1350/2000 [11:07<05:56,  1.82it/s]

Training_loss 7.64595


 68%|██████▊   | 1351/2000 [11:08<05:56,  1.82it/s]

Training_loss 7.64080


 68%|██████▊   | 1352/2000 [11:08<05:51,  1.84it/s]

Training_loss 7.63429


 68%|██████▊   | 1353/2000 [11:09<05:35,  1.93it/s]

Training_loss 7.62343


 68%|██████▊   | 1354/2000 [11:09<05:33,  1.94it/s]

Training_loss 7.61814


 68%|██████▊   | 1355/2000 [11:10<05:29,  1.95it/s]

Training_loss 7.60949


 68%|██████▊   | 1356/2000 [11:10<05:26,  1.97it/s]

Training_loss 7.60804


 68%|██████▊   | 1357/2000 [11:10<05:16,  2.03it/s]

Training_loss 7.60404


 68%|██████▊   | 1358/2000 [11:11<05:21,  2.00it/s]

Training_loss 7.60139


 68%|██████▊   | 1359/2000 [11:12<05:23,  1.98it/s]

Training_loss 7.59490


 68%|██████▊   | 1360/2000 [11:12<05:24,  1.97it/s]

Training_loss 7.59585


 68%|██████▊   | 1361/2000 [11:12<05:02,  2.11it/s]

Training_loss 7.59092


 68%|██████▊   | 1362/2000 [11:13<04:51,  2.19it/s]

Training_loss 7.58783


 68%|██████▊   | 1363/2000 [11:13<05:02,  2.11it/s]

Training_loss 7.58698


 68%|██████▊   | 1364/2000 [11:14<04:34,  2.32it/s]

Training_loss 7.57750


 68%|██████▊   | 1365/2000 [11:14<04:54,  2.16it/s]

Training_loss 7.57036


 68%|██████▊   | 1366/2000 [11:15<05:07,  2.06it/s]

Training_loss 7.56045


 68%|██████▊   | 1367/2000 [11:15<05:11,  2.03it/s]

Training_loss 7.55945


 68%|██████▊   | 1368/2000 [11:16<05:12,  2.02it/s]

Training_loss 7.55253


 68%|██████▊   | 1369/2000 [11:16<05:29,  1.92it/s]

Training_loss 7.54825


 68%|██████▊   | 1370/2000 [11:17<05:25,  1.94it/s]

Training_loss 7.54980


 69%|██████▊   | 1371/2000 [11:17<05:24,  1.94it/s]

Training_loss 7.55076


 69%|██████▊   | 1372/2000 [11:18<05:19,  1.97it/s]

Training_loss 7.54622


 69%|██████▊   | 1373/2000 [11:18<05:17,  1.97it/s]

Training_loss 7.53755


 69%|██████▊   | 1374/2000 [11:19<05:05,  2.05it/s]

Training_loss 7.53810


 69%|██████▉   | 1375/2000 [11:19<05:07,  2.03it/s]

Training_loss 7.53393


 69%|██████▉   | 1376/2000 [11:20<04:57,  2.10it/s]

Training_loss 7.53299


 69%|██████▉   | 1377/2000 [11:20<05:06,  2.03it/s]

Training_loss 7.52272


 69%|██████▉   | 1378/2000 [11:21<05:00,  2.07it/s]

Training_loss 7.50863


 69%|██████▉   | 1379/2000 [11:21<05:05,  2.03it/s]

Training_loss 7.49894


 69%|██████▉   | 1380/2000 [11:22<05:04,  2.04it/s]

Training_loss 7.49576


 69%|██████▉   | 1381/2000 [11:22<04:59,  2.07it/s]

Training_loss 7.48759


 69%|██████▉   | 1382/2000 [11:23<04:54,  2.10it/s]

Training_loss 7.48527


 69%|██████▉   | 1383/2000 [11:23<04:54,  2.10it/s]

Training_loss 7.47864


 69%|██████▉   | 1384/2000 [11:24<04:49,  2.12it/s]

Training_loss 7.47273


 69%|██████▉   | 1385/2000 [11:24<04:57,  2.07it/s]

Training_loss 7.46524


 69%|██████▉   | 1386/2000 [11:25<04:49,  2.12it/s]

Training_loss 7.46385


 69%|██████▉   | 1387/2000 [11:25<04:50,  2.11it/s]

Training_loss 7.45433


 69%|██████▉   | 1388/2000 [11:26<04:49,  2.12it/s]

Training_loss 7.45009


 69%|██████▉   | 1389/2000 [11:26<04:49,  2.11it/s]

Training_loss 7.44009


 70%|██████▉   | 1390/2000 [11:27<04:57,  2.05it/s]

Training_loss 7.43644


 70%|██████▉   | 1391/2000 [11:27<04:59,  2.03it/s]

Training_loss 7.43213


 70%|██████▉   | 1392/2000 [11:27<04:49,  2.10it/s]

Training_loss 7.42572


 70%|██████▉   | 1393/2000 [11:28<04:54,  2.06it/s]

Training_loss 7.41098


 70%|██████▉   | 1394/2000 [11:28<04:23,  2.30it/s]

Training_loss 7.40610


 70%|██████▉   | 1395/2000 [11:29<03:55,  2.57it/s]

Training_loss 7.40015


 70%|██████▉   | 1396/2000 [11:29<04:02,  2.49it/s]

Training_loss 7.39353


 70%|██████▉   | 1397/2000 [11:29<03:55,  2.56it/s]

Training_loss 7.38608


 70%|██████▉   | 1398/2000 [11:30<04:12,  2.38it/s]

Training_loss 7.38251


 70%|██████▉   | 1399/2000 [11:30<04:23,  2.28it/s]

Training_loss 7.37733


 70%|███████   | 1400/2000 [11:31<04:28,  2.23it/s]

Training_loss 7.36606


 70%|███████   | 1401/2000 [11:31<04:40,  2.13it/s]

Training_loss 7.36386


 70%|███████   | 1402/2000 [11:32<04:51,  2.05it/s]

Training_loss 7.36014


 70%|███████   | 1403/2000 [11:32<04:55,  2.02it/s]

Training_loss 7.35203


 70%|███████   | 1404/2000 [11:33<04:55,  2.02it/s]

Training_loss 7.34869


 70%|███████   | 1405/2000 [11:33<05:09,  1.92it/s]

Training_loss 7.34101


 70%|███████   | 1406/2000 [11:34<04:54,  2.02it/s]

Training_loss 7.33857


 70%|███████   | 1407/2000 [11:34<04:52,  2.03it/s]

Training_loss 7.33376


 70%|███████   | 1408/2000 [11:35<04:56,  2.00it/s]

Training_loss 7.32363


 70%|███████   | 1409/2000 [11:35<04:59,  1.97it/s]

Training_loss 7.31870


 70%|███████   | 1410/2000 [11:36<04:49,  2.03it/s]

Training_loss 7.30931


 71%|███████   | 1411/2000 [11:36<04:55,  1.99it/s]

Training_loss 7.30024


 71%|███████   | 1412/2000 [11:37<05:04,  1.93it/s]

Training_loss 7.29611


 71%|███████   | 1413/2000 [11:38<05:13,  1.87it/s]

Training_loss 7.28982


 71%|███████   | 1414/2000 [11:38<05:12,  1.87it/s]

Training_loss 7.28249


 71%|███████   | 1415/2000 [11:39<05:20,  1.82it/s]

Training_loss 7.27217


 71%|███████   | 1416/2000 [11:39<05:23,  1.81it/s]

Training_loss 7.26473


 71%|███████   | 1417/2000 [11:40<05:47,  1.68it/s]

Training_loss 7.25876


 71%|███████   | 1418/2000 [11:40<05:41,  1.70it/s]

Training_loss 7.25534


 71%|███████   | 1419/2000 [11:41<05:32,  1.75it/s]

Training_loss 7.25047


 71%|███████   | 1420/2000 [11:42<05:36,  1.72it/s]

Training_loss 7.24923


 71%|███████   | 1421/2000 [11:42<05:45,  1.67it/s]

Training_loss 7.24587


 71%|███████   | 1422/2000 [11:43<05:35,  1.72it/s]

Training_loss 7.24014


 71%|███████   | 1423/2000 [11:43<05:34,  1.73it/s]

Training_loss 7.23221


 71%|███████   | 1424/2000 [11:44<05:30,  1.74it/s]

Training_loss 7.22736


 71%|███████▏  | 1425/2000 [11:44<04:42,  2.04it/s]

Training_loss 7.21874


 71%|███████▏  | 1426/2000 [11:45<04:20,  2.20it/s]

Training_loss 7.21803


 71%|███████▏  | 1427/2000 [11:45<04:39,  2.05it/s]

Training_loss 7.21250


 71%|███████▏  | 1428/2000 [11:46<04:45,  2.01it/s]

Training_loss 7.20549


 71%|███████▏  | 1429/2000 [11:46<04:57,  1.92it/s]

Training_loss 7.19664


 72%|███████▏  | 1430/2000 [11:47<05:08,  1.85it/s]

Training_loss 7.19515


 72%|███████▏  | 1431/2000 [11:47<05:07,  1.85it/s]

Training_loss 7.19367


 72%|███████▏  | 1432/2000 [11:48<04:57,  1.91it/s]

Training_loss 7.18270


 72%|███████▏  | 1433/2000 [11:48<04:51,  1.94it/s]

Training_loss 7.17618


 72%|███████▏  | 1434/2000 [11:49<05:01,  1.88it/s]

Training_loss 7.17495


 72%|███████▏  | 1435/2000 [11:49<04:55,  1.91it/s]

Training_loss 7.16615


 72%|███████▏  | 1436/2000 [11:50<04:58,  1.89it/s]

Training_loss 7.16378


 72%|███████▏  | 1437/2000 [11:50<04:48,  1.95it/s]

Training_loss 7.16149


 72%|███████▏  | 1438/2000 [11:51<04:49,  1.94it/s]

Training_loss 7.15620


 72%|███████▏  | 1439/2000 [11:52<04:59,  1.87it/s]

Training_loss 7.14893


 72%|███████▏  | 1440/2000 [11:52<05:27,  1.71it/s]

Training_loss 7.14696


 72%|███████▏  | 1441/2000 [11:53<05:14,  1.78it/s]

Training_loss 7.14707


 72%|███████▏  | 1442/2000 [11:53<04:58,  1.87it/s]

Training_loss 7.14462


 72%|███████▏  | 1443/2000 [11:54<04:48,  1.93it/s]

Training_loss 7.13776


 72%|███████▏  | 1444/2000 [11:54<04:40,  1.98it/s]

Training_loss 7.13376


 72%|███████▏  | 1445/2000 [11:55<04:43,  1.96it/s]

Training_loss 7.12811


 72%|███████▏  | 1446/2000 [11:55<04:50,  1.91it/s]

Training_loss 7.12512


 72%|███████▏  | 1447/2000 [11:56<04:50,  1.90it/s]

Training_loss 7.12195


 72%|███████▏  | 1448/2000 [11:56<04:51,  1.89it/s]

Training_loss 7.12062


 72%|███████▏  | 1449/2000 [11:57<04:42,  1.95it/s]

Training_loss 7.11751


 72%|███████▎  | 1450/2000 [11:57<04:37,  1.98it/s]

Training_loss 7.11812


 73%|███████▎  | 1451/2000 [11:58<04:36,  1.98it/s]

Training_loss 7.10862


 73%|███████▎  | 1452/2000 [11:58<04:22,  2.09it/s]

Training_loss 7.10320


 73%|███████▎  | 1453/2000 [11:59<04:28,  2.04it/s]

Training_loss 7.09551


 73%|███████▎  | 1454/2000 [11:59<04:25,  2.06it/s]

Training_loss 7.08988


 73%|███████▎  | 1455/2000 [12:00<04:30,  2.01it/s]

Training_loss 7.08470


 73%|███████▎  | 1456/2000 [12:00<04:25,  2.05it/s]

Training_loss 7.08333


 73%|███████▎  | 1457/2000 [12:01<04:12,  2.15it/s]

Training_loss 7.07749


 73%|███████▎  | 1458/2000 [12:01<04:05,  2.21it/s]

Training_loss 7.07335


 73%|███████▎  | 1459/2000 [12:01<03:38,  2.47it/s]

Training_loss 7.06838


 73%|███████▎  | 1460/2000 [12:02<03:41,  2.44it/s]

Training_loss 7.06256


 73%|███████▎  | 1461/2000 [12:02<03:55,  2.29it/s]

Training_loss 7.05930


 73%|███████▎  | 1462/2000 [12:03<04:09,  2.15it/s]

Training_loss 7.05277


 73%|███████▎  | 1463/2000 [12:03<04:20,  2.06it/s]

Training_loss 7.05064


 73%|███████▎  | 1464/2000 [12:04<04:26,  2.01it/s]

Training_loss 7.04191


 73%|███████▎  | 1465/2000 [12:04<04:35,  1.94it/s]

Training_loss 7.03402


 73%|███████▎  | 1466/2000 [12:05<04:26,  2.00it/s]

Training_loss 7.02867


 73%|███████▎  | 1467/2000 [12:05<04:25,  2.01it/s]

Training_loss 7.02529


 73%|███████▎  | 1468/2000 [12:06<04:17,  2.06it/s]

Training_loss 7.01882


 73%|███████▎  | 1469/2000 [12:06<04:17,  2.06it/s]

Training_loss 7.02004


 74%|███████▎  | 1470/2000 [12:07<04:23,  2.01it/s]

Training_loss 7.01368


 74%|███████▎  | 1471/2000 [12:07<04:25,  1.99it/s]

Training_loss 7.00658


 74%|███████▎  | 1472/2000 [12:08<04:21,  2.02it/s]

Training_loss 6.99126


 74%|███████▎  | 1473/2000 [12:08<04:19,  2.03it/s]

Training_loss 6.98961


 74%|███████▎  | 1474/2000 [12:09<04:23,  1.99it/s]

Training_loss 6.98474


 74%|███████▍  | 1475/2000 [12:09<04:21,  2.00it/s]

Training_loss 6.97698


 74%|███████▍  | 1476/2000 [12:10<04:18,  2.03it/s]

Training_loss 6.97461


 74%|███████▍  | 1477/2000 [12:10<04:21,  2.00it/s]

Training_loss 6.96899


 74%|███████▍  | 1478/2000 [12:11<04:15,  2.04it/s]

Training_loss 6.96659


 74%|███████▍  | 1479/2000 [12:11<04:09,  2.08it/s]

Training_loss 6.96044


 74%|███████▍  | 1480/2000 [12:12<04:07,  2.10it/s]

Training_loss 6.95394


 74%|███████▍  | 1481/2000 [12:12<04:00,  2.16it/s]

Training_loss 6.94992


 74%|███████▍  | 1482/2000 [12:13<04:10,  2.07it/s]

Training_loss 6.94635


 74%|███████▍  | 1483/2000 [12:13<04:19,  1.99it/s]

Training_loss 6.93623


 74%|███████▍  | 1484/2000 [12:14<04:14,  2.03it/s]

Training_loss 6.93049


 74%|███████▍  | 1485/2000 [12:14<04:20,  1.98it/s]

Training_loss 6.92428


 74%|███████▍  | 1486/2000 [12:15<04:14,  2.02it/s]

Training_loss 6.91455


 74%|███████▍  | 1487/2000 [12:15<04:12,  2.04it/s]

Training_loss 6.91881


 74%|███████▍  | 1488/2000 [12:16<04:14,  2.01it/s]

Training_loss 6.90692


 74%|███████▍  | 1489/2000 [12:16<03:58,  2.14it/s]

Training_loss 6.90219


 74%|███████▍  | 1490/2000 [12:16<03:31,  2.41it/s]

Training_loss 6.90019


 75%|███████▍  | 1491/2000 [12:17<03:24,  2.49it/s]

Training_loss 6.89709


 75%|███████▍  | 1492/2000 [12:17<03:16,  2.59it/s]

Training_loss 6.89129


 75%|███████▍  | 1493/2000 [12:17<03:12,  2.63it/s]

Training_loss 6.89071


 75%|███████▍  | 1494/2000 [12:18<03:32,  2.38it/s]

Training_loss 6.88153


 75%|███████▍  | 1495/2000 [12:18<03:43,  2.26it/s]

Training_loss 6.87196


 75%|███████▍  | 1496/2000 [12:19<03:56,  2.13it/s]

Training_loss 6.86342


 75%|███████▍  | 1497/2000 [12:19<04:04,  2.06it/s]

Training_loss 6.85583


 75%|███████▍  | 1498/2000 [12:20<04:08,  2.02it/s]

Training_loss 6.85145


 75%|███████▍  | 1499/2000 [12:20<04:05,  2.04it/s]

Training_loss 6.84900


 75%|███████▌  | 1500/2000 [12:21<04:07,  2.02it/s]

Training_loss 6.84013


 75%|███████▌  | 1501/2000 [12:22<04:19,  1.92it/s]

Training_loss 6.83913


 75%|███████▌  | 1502/2000 [12:22<04:24,  1.88it/s]

Training_loss 6.82947


 75%|███████▌  | 1503/2000 [12:23<04:10,  1.99it/s]

Training_loss 6.82430


 75%|███████▌  | 1504/2000 [12:23<04:15,  1.94it/s]

Training_loss 6.82064


 75%|███████▌  | 1505/2000 [12:24<04:10,  1.98it/s]

Training_loss 6.81667


 75%|███████▌  | 1506/2000 [12:24<04:11,  1.96it/s]

Training_loss 6.80762


 75%|███████▌  | 1507/2000 [12:25<04:11,  1.96it/s]

Training_loss 6.80310


 75%|███████▌  | 1508/2000 [12:25<04:08,  1.98it/s]

Training_loss 6.79623


 75%|███████▌  | 1509/2000 [12:26<04:02,  2.03it/s]

Training_loss 6.79389


 76%|███████▌  | 1510/2000 [12:26<04:07,  1.98it/s]

Training_loss 6.78897


 76%|███████▌  | 1511/2000 [12:27<03:57,  2.06it/s]

Training_loss 6.77863


 76%|███████▌  | 1512/2000 [12:27<03:52,  2.10it/s]

Training_loss 6.77066


 76%|███████▌  | 1513/2000 [12:28<03:56,  2.06it/s]

Training_loss 6.76809


 76%|███████▌  | 1514/2000 [12:28<04:04,  1.99it/s]

Training_loss 6.75929


 76%|███████▌  | 1515/2000 [12:29<03:59,  2.02it/s]

Training_loss 6.75957


 76%|███████▌  | 1516/2000 [12:29<03:57,  2.04it/s]

Training_loss 6.75559


 76%|███████▌  | 1517/2000 [12:29<03:50,  2.10it/s]

Training_loss 6.74942


 76%|███████▌  | 1518/2000 [12:30<03:50,  2.09it/s]

Training_loss 6.74681


 76%|███████▌  | 1519/2000 [12:30<03:55,  2.04it/s]

Training_loss 6.74006


 76%|███████▌  | 1520/2000 [12:31<03:51,  2.08it/s]

Training_loss 6.73369


 76%|███████▌  | 1521/2000 [12:31<03:53,  2.05it/s]

Training_loss 6.72658


 76%|███████▌  | 1522/2000 [12:32<03:52,  2.05it/s]

Training_loss 6.71804


 76%|███████▌  | 1523/2000 [12:32<03:40,  2.16it/s]

Training_loss 6.71263


 76%|███████▌  | 1524/2000 [12:33<03:10,  2.49it/s]

Training_loss 6.70056


 76%|███████▋  | 1525/2000 [12:33<03:17,  2.41it/s]

Training_loss 6.69818


 76%|███████▋  | 1526/2000 [12:33<03:15,  2.43it/s]

Training_loss 6.69883


 76%|███████▋  | 1527/2000 [12:34<03:02,  2.59it/s]

Training_loss 6.69063


 76%|███████▋  | 1528/2000 [12:34<03:14,  2.43it/s]

Training_loss 6.68295


 76%|███████▋  | 1529/2000 [12:35<03:27,  2.27it/s]

Training_loss 6.67294


 76%|███████▋  | 1530/2000 [12:35<03:34,  2.20it/s]

Training_loss 6.66917


 77%|███████▋  | 1531/2000 [12:36<03:38,  2.15it/s]

Training_loss 6.66807


 77%|███████▋  | 1532/2000 [12:36<03:38,  2.14it/s]

Training_loss 6.66173


 77%|███████▋  | 1533/2000 [12:37<03:45,  2.07it/s]

Training_loss 6.65502


 77%|███████▋  | 1534/2000 [12:37<03:37,  2.14it/s]

Training_loss 6.64478


 77%|███████▋  | 1535/2000 [12:38<03:37,  2.13it/s]

Training_loss 6.63802


 77%|███████▋  | 1536/2000 [12:38<03:38,  2.12it/s]

Training_loss 6.63413


 77%|███████▋  | 1537/2000 [12:39<03:46,  2.05it/s]

Training_loss 6.62784


 77%|███████▋  | 1538/2000 [12:39<03:42,  2.08it/s]

Training_loss 6.62111


 77%|███████▋  | 1539/2000 [12:40<03:35,  2.14it/s]

Training_loss 6.60741


 77%|███████▋  | 1540/2000 [12:40<03:50,  2.00it/s]

Training_loss 6.60594


 77%|███████▋  | 1541/2000 [12:41<03:44,  2.04it/s]

Training_loss 6.60503


 77%|███████▋  | 1542/2000 [12:41<03:39,  2.08it/s]

Training_loss 6.60588


 77%|███████▋  | 1543/2000 [12:42<03:42,  2.06it/s]

Training_loss 6.60223


 77%|███████▋  | 1544/2000 [12:42<03:35,  2.12it/s]

Training_loss 6.59290


 77%|███████▋  | 1545/2000 [12:42<03:37,  2.09it/s]

Training_loss 6.58341


 77%|███████▋  | 1546/2000 [12:43<03:40,  2.06it/s]

Training_loss 6.57630


 77%|███████▋  | 1547/2000 [12:43<03:35,  2.10it/s]

Training_loss 6.56978


 77%|███████▋  | 1548/2000 [12:44<03:31,  2.14it/s]

Training_loss 6.56213


 77%|███████▋  | 1549/2000 [12:44<03:32,  2.12it/s]

Training_loss 6.55763


 78%|███████▊  | 1550/2000 [12:45<03:33,  2.11it/s]

Training_loss 6.54939


 78%|███████▊  | 1551/2000 [12:45<03:36,  2.07it/s]

Training_loss 6.54053


 78%|███████▊  | 1552/2000 [12:46<03:31,  2.12it/s]

Training_loss 6.53204


 78%|███████▊  | 1553/2000 [12:46<03:34,  2.09it/s]

Training_loss 6.53129


 78%|███████▊  | 1554/2000 [12:47<03:29,  2.13it/s]

Training_loss 6.52552


 78%|███████▊  | 1555/2000 [12:47<03:32,  2.09it/s]

Training_loss 6.51616


 78%|███████▊  | 1556/2000 [12:48<03:30,  2.11it/s]

Training_loss 6.51078


 78%|███████▊  | 1557/2000 [12:48<03:33,  2.07it/s]

Training_loss 6.50397


 78%|███████▊  | 1558/2000 [12:48<03:12,  2.30it/s]

Training_loss 6.50185


 78%|███████▊  | 1559/2000 [12:49<03:05,  2.38it/s]

Training_loss 6.49934


 78%|███████▊  | 1560/2000 [12:49<03:03,  2.39it/s]

Training_loss 6.49235


 78%|███████▊  | 1561/2000 [12:50<02:50,  2.57it/s]

Training_loss 6.48856


 78%|███████▊  | 1562/2000 [12:50<02:47,  2.61it/s]

Training_loss 6.48278


 78%|███████▊  | 1563/2000 [12:50<03:05,  2.36it/s]

Training_loss 6.47624


 78%|███████▊  | 1564/2000 [12:51<03:13,  2.26it/s]

Training_loss 6.47653


 78%|███████▊  | 1565/2000 [12:51<03:14,  2.24it/s]

Training_loss 6.47228


 78%|███████▊  | 1566/2000 [12:52<03:12,  2.26it/s]

Training_loss 6.46681


 78%|███████▊  | 1567/2000 [12:52<03:23,  2.13it/s]

Training_loss 6.46385


 78%|███████▊  | 1568/2000 [12:53<03:31,  2.04it/s]

Training_loss 6.45790


 78%|███████▊  | 1569/2000 [12:53<03:39,  1.97it/s]

Training_loss 6.45371


 78%|███████▊  | 1570/2000 [12:54<03:37,  1.98it/s]

Training_loss 6.44926


 79%|███████▊  | 1571/2000 [12:55<03:38,  1.97it/s]

Training_loss 6.44683


 79%|███████▊  | 1572/2000 [12:55<03:38,  1.96it/s]

Training_loss 6.43316


 79%|███████▊  | 1573/2000 [12:55<03:32,  2.01it/s]

Training_loss 6.42690


 79%|███████▊  | 1574/2000 [12:56<03:32,  2.00it/s]

Training_loss 6.42391


 79%|███████▉  | 1575/2000 [12:56<03:29,  2.03it/s]

Training_loss 6.41989


 79%|███████▉  | 1576/2000 [12:57<03:26,  2.05it/s]

Training_loss 6.41621


 79%|███████▉  | 1577/2000 [12:57<03:20,  2.11it/s]

Training_loss 6.41747


 79%|███████▉  | 1578/2000 [12:58<03:26,  2.05it/s]

Training_loss 6.41672


 79%|███████▉  | 1579/2000 [12:58<03:31,  1.99it/s]

Training_loss 6.41121


 79%|███████▉  | 1580/2000 [12:59<03:32,  1.98it/s]

Training_loss 6.41144


 79%|███████▉  | 1581/2000 [12:59<03:28,  2.01it/s]

Training_loss 6.40442


 79%|███████▉  | 1582/2000 [13:00<03:26,  2.03it/s]

Training_loss 6.39977


 79%|███████▉  | 1583/2000 [13:00<03:21,  2.07it/s]

Training_loss 6.38894


 79%|███████▉  | 1584/2000 [13:01<03:29,  1.99it/s]

Training_loss 6.38919


 79%|███████▉  | 1585/2000 [13:01<03:22,  2.05it/s]

Training_loss 6.38474


 79%|███████▉  | 1586/2000 [13:02<03:25,  2.01it/s]

Training_loss 6.37629


 79%|███████▉  | 1587/2000 [13:02<03:23,  2.03it/s]

Training_loss 6.37057


 79%|███████▉  | 1588/2000 [13:03<03:19,  2.07it/s]

Training_loss 6.36357


 79%|███████▉  | 1589/2000 [13:03<03:19,  2.06it/s]

Training_loss 6.35915


 80%|███████▉  | 1590/2000 [13:04<03:22,  2.03it/s]

Training_loss 6.35619


 80%|███████▉  | 1591/2000 [13:04<03:18,  2.06it/s]

Training_loss 6.35372


 80%|███████▉  | 1592/2000 [13:05<03:18,  2.05it/s]

Training_loss 6.35107


 80%|███████▉  | 1593/2000 [13:05<03:02,  2.23it/s]

Training_loss 6.35016


 80%|███████▉  | 1594/2000 [13:06<02:54,  2.32it/s]

Training_loss 6.34465


 80%|███████▉  | 1595/2000 [13:06<03:01,  2.23it/s]

Training_loss 6.33574


 80%|███████▉  | 1596/2000 [13:06<02:51,  2.36it/s]

Training_loss 6.33537


 80%|███████▉  | 1597/2000 [13:07<02:51,  2.35it/s]

Training_loss 6.33290


 80%|███████▉  | 1598/2000 [13:07<03:00,  2.23it/s]

Training_loss 6.33391


 80%|███████▉  | 1599/2000 [13:08<03:14,  2.06it/s]

Training_loss 6.33454


 80%|████████  | 1600/2000 [13:08<03:14,  2.06it/s]

Training_loss 6.33245


 80%|████████  | 1601/2000 [13:09<03:11,  2.08it/s]

Training_loss 6.32736


 80%|████████  | 1602/2000 [13:09<03:16,  2.03it/s]

Training_loss 6.32316


 80%|████████  | 1603/2000 [13:10<03:21,  1.97it/s]

Training_loss 6.31725


 80%|████████  | 1604/2000 [13:10<03:16,  2.01it/s]

Training_loss 6.30906


 80%|████████  | 1605/2000 [13:11<03:12,  2.05it/s]

Training_loss 6.30766


 80%|████████  | 1606/2000 [13:11<03:08,  2.09it/s]

Training_loss 6.30236


 80%|████████  | 1607/2000 [13:12<03:05,  2.12it/s]

Training_loss 6.29812


 80%|████████  | 1608/2000 [13:12<03:09,  2.07it/s]

Training_loss 6.29555


 80%|████████  | 1609/2000 [13:13<03:04,  2.12it/s]

Training_loss 6.28944


 80%|████████  | 1610/2000 [13:13<03:09,  2.06it/s]

Training_loss 6.28735


 81%|████████  | 1611/2000 [13:14<03:07,  2.07it/s]

Training_loss 6.28379


 81%|████████  | 1612/2000 [13:14<03:04,  2.10it/s]

Training_loss 6.27807


 81%|████████  | 1613/2000 [13:15<03:05,  2.08it/s]

Training_loss 6.27356


 81%|████████  | 1614/2000 [13:15<03:04,  2.09it/s]

Training_loss 6.26588


 81%|████████  | 1615/2000 [13:16<03:06,  2.07it/s]

Training_loss 6.26187


 81%|████████  | 1616/2000 [13:16<03:01,  2.12it/s]

Training_loss 6.25505


 81%|████████  | 1617/2000 [13:17<03:01,  2.11it/s]

Training_loss 6.25104


 81%|████████  | 1618/2000 [13:17<02:59,  2.13it/s]

Training_loss 6.24623


 81%|████████  | 1619/2000 [13:17<02:56,  2.16it/s]

Training_loss 6.24079


 81%|████████  | 1620/2000 [13:18<02:57,  2.14it/s]

Training_loss 6.23973


 81%|████████  | 1621/2000 [13:18<03:00,  2.11it/s]

Training_loss 6.23675


 81%|████████  | 1622/2000 [13:19<03:03,  2.06it/s]

Training_loss 6.23670


 81%|████████  | 1623/2000 [13:19<03:03,  2.05it/s]

Training_loss 6.23653


 81%|████████  | 1624/2000 [13:20<02:59,  2.10it/s]

Training_loss 6.22828


 81%|████████▏ | 1625/2000 [13:20<02:47,  2.24it/s]

Training_loss 6.22530


 81%|████████▏ | 1626/2000 [13:21<02:28,  2.51it/s]

Training_loss 6.21889


 81%|████████▏ | 1627/2000 [13:21<02:35,  2.40it/s]

Training_loss 6.21183


 81%|████████▏ | 1628/2000 [13:21<02:37,  2.36it/s]

Training_loss 6.20724


 81%|████████▏ | 1629/2000 [13:22<02:31,  2.45it/s]

Training_loss 6.20416


 82%|████████▏ | 1630/2000 [13:22<02:40,  2.31it/s]

Training_loss 6.19472


 82%|████████▏ | 1631/2000 [13:23<02:45,  2.23it/s]

Training_loss 6.18963


 82%|████████▏ | 1632/2000 [13:23<02:54,  2.11it/s]

Training_loss 6.18459


 82%|████████▏ | 1633/2000 [13:24<02:59,  2.04it/s]

Training_loss 6.18075


 82%|████████▏ | 1634/2000 [13:24<03:00,  2.03it/s]

Training_loss 6.17612


 82%|████████▏ | 1635/2000 [13:25<03:00,  2.02it/s]

Training_loss 6.16997


 82%|████████▏ | 1636/2000 [13:25<02:51,  2.12it/s]

Training_loss 6.16552


 82%|████████▏ | 1637/2000 [13:26<02:51,  2.12it/s]

Training_loss 6.16441


 82%|████████▏ | 1638/2000 [13:26<02:52,  2.10it/s]

Training_loss 6.15523


 82%|████████▏ | 1639/2000 [13:27<02:47,  2.16it/s]

Training_loss 6.15156


 82%|████████▏ | 1640/2000 [13:27<02:47,  2.14it/s]

Training_loss 6.14961


 82%|████████▏ | 1641/2000 [13:28<02:50,  2.11it/s]

Training_loss 6.14258


 82%|████████▏ | 1642/2000 [13:28<02:47,  2.13it/s]

Training_loss 6.14111


 82%|████████▏ | 1643/2000 [13:29<02:49,  2.11it/s]

Training_loss 6.13930


 82%|████████▏ | 1644/2000 [13:29<02:46,  2.14it/s]

Training_loss 6.13655


 82%|████████▏ | 1645/2000 [13:30<02:45,  2.14it/s]

Training_loss 6.13220


 82%|████████▏ | 1646/2000 [13:30<02:45,  2.14it/s]

Training_loss 6.12871


 82%|████████▏ | 1647/2000 [13:30<02:43,  2.16it/s]

Training_loss 6.12658


 82%|████████▏ | 1648/2000 [13:31<02:41,  2.18it/s]

Training_loss 6.11779


 82%|████████▏ | 1649/2000 [13:31<02:41,  2.18it/s]

Training_loss 6.11414


 82%|████████▎ | 1650/2000 [13:32<02:43,  2.14it/s]

Training_loss 6.11327


 83%|████████▎ | 1651/2000 [13:32<02:42,  2.15it/s]

Training_loss 6.11281


 83%|████████▎ | 1652/2000 [13:33<02:39,  2.19it/s]

Training_loss 6.11144


 83%|████████▎ | 1653/2000 [13:33<02:44,  2.10it/s]

Training_loss 6.10908


 83%|████████▎ | 1654/2000 [13:34<02:46,  2.08it/s]

Training_loss 6.10776


 83%|████████▎ | 1655/2000 [13:34<02:47,  2.06it/s]

Training_loss 6.10965


 83%|████████▎ | 1656/2000 [13:35<02:47,  2.05it/s]

Training_loss 6.10548


 83%|████████▎ | 1657/2000 [13:35<02:44,  2.09it/s]

Training_loss 6.10429


 83%|████████▎ | 1658/2000 [13:36<02:46,  2.05it/s]

Training_loss 6.10227


 83%|████████▎ | 1659/2000 [13:36<02:39,  2.14it/s]

Training_loss 6.09760


 83%|████████▎ | 1660/2000 [13:36<02:24,  2.35it/s]

Training_loss 6.09979


 83%|████████▎ | 1661/2000 [13:37<02:19,  2.43it/s]

Training_loss 6.09402


 83%|████████▎ | 1662/2000 [13:37<02:11,  2.57it/s]

Training_loss 6.09146


 83%|████████▎ | 1663/2000 [13:38<02:14,  2.50it/s]

Training_loss 6.09141


 83%|████████▎ | 1664/2000 [13:38<02:18,  2.42it/s]

Training_loss 6.09174


 83%|████████▎ | 1665/2000 [13:39<02:29,  2.24it/s]

Training_loss 6.09027


 83%|████████▎ | 1666/2000 [13:39<02:29,  2.23it/s]

Training_loss 6.08711


 83%|████████▎ | 1667/2000 [13:39<02:29,  2.22it/s]

Training_loss 6.08427


 83%|████████▎ | 1668/2000 [13:40<02:29,  2.22it/s]

Training_loss 6.08028


 83%|████████▎ | 1669/2000 [13:40<02:30,  2.20it/s]

Training_loss 6.08151


 84%|████████▎ | 1670/2000 [13:41<02:27,  2.23it/s]

Training_loss 6.08019


 84%|████████▎ | 1671/2000 [13:41<02:29,  2.21it/s]

Training_loss 6.07432


 84%|████████▎ | 1672/2000 [13:42<02:30,  2.18it/s]

Training_loss 6.06684


 84%|████████▎ | 1673/2000 [13:42<02:26,  2.22it/s]

Training_loss 6.05259


 84%|████████▎ | 1674/2000 [13:43<02:28,  2.19it/s]

Training_loss 6.04935


 84%|████████▍ | 1675/2000 [13:43<02:20,  2.31it/s]

Training_loss 6.04952


 84%|████████▍ | 1676/2000 [13:44<02:34,  2.09it/s]

Training_loss 6.04452


 84%|████████▍ | 1677/2000 [13:44<02:26,  2.20it/s]

Training_loss 6.04341


 84%|████████▍ | 1678/2000 [13:44<02:20,  2.28it/s]

Training_loss 6.04042


 84%|████████▍ | 1679/2000 [13:45<02:23,  2.24it/s]

Training_loss 6.04119


 84%|████████▍ | 1680/2000 [13:45<02:27,  2.18it/s]

Training_loss 6.03422


 84%|████████▍ | 1681/2000 [13:46<02:32,  2.09it/s]

Training_loss 6.03253


 84%|████████▍ | 1682/2000 [13:46<02:32,  2.08it/s]

Training_loss 6.02805


 84%|████████▍ | 1683/2000 [13:47<02:31,  2.09it/s]

Training_loss 6.02251


 84%|████████▍ | 1684/2000 [13:47<02:29,  2.12it/s]

Training_loss 6.01842


 84%|████████▍ | 1685/2000 [13:48<02:32,  2.07it/s]

Training_loss 6.01073


 84%|████████▍ | 1686/2000 [13:48<02:29,  2.10it/s]

Training_loss 6.00659


 84%|████████▍ | 1687/2000 [13:49<02:35,  2.01it/s]

Training_loss 6.00006


 84%|████████▍ | 1688/2000 [13:49<02:37,  1.98it/s]

Training_loss 5.99836


 84%|████████▍ | 1689/2000 [13:50<02:33,  2.03it/s]

Training_loss 5.99579


 84%|████████▍ | 1690/2000 [13:50<02:36,  1.98it/s]

Training_loss 5.99234


 85%|████████▍ | 1691/2000 [13:51<02:35,  1.99it/s]

Training_loss 5.98861


 85%|████████▍ | 1692/2000 [13:51<02:34,  2.00it/s]

Training_loss 5.98361


 85%|████████▍ | 1693/2000 [13:52<02:30,  2.04it/s]

Training_loss 5.97904


 85%|████████▍ | 1694/2000 [13:52<02:20,  2.17it/s]

Training_loss 5.97863


 85%|████████▍ | 1695/2000 [13:53<02:07,  2.39it/s]

Training_loss 5.97558


 85%|████████▍ | 1696/2000 [13:53<02:19,  2.19it/s]

Training_loss 5.97115


 85%|████████▍ | 1697/2000 [13:54<02:22,  2.13it/s]

Training_loss 5.96786


 85%|████████▍ | 1698/2000 [13:54<02:15,  2.23it/s]

Training_loss 5.96286


 85%|████████▍ | 1699/2000 [13:55<02:23,  2.10it/s]

Training_loss 5.95581


 85%|████████▌ | 1700/2000 [13:55<02:20,  2.13it/s]

Training_loss 5.95194


 85%|████████▌ | 1701/2000 [13:56<02:27,  2.03it/s]

Training_loss 5.95228


 85%|████████▌ | 1702/2000 [13:56<02:30,  1.98it/s]

Training_loss 5.95050


 85%|████████▌ | 1703/2000 [13:57<02:26,  2.02it/s]

Training_loss 5.95028


 85%|████████▌ | 1704/2000 [13:57<02:30,  1.96it/s]

Training_loss 5.94566


 85%|████████▌ | 1705/2000 [13:58<02:35,  1.90it/s]

Training_loss 5.94605


 85%|████████▌ | 1706/2000 [13:58<02:30,  1.95it/s]

Training_loss 5.94679


 85%|████████▌ | 1707/2000 [13:59<02:30,  1.95it/s]

Training_loss 5.94668


 85%|████████▌ | 1708/2000 [13:59<02:30,  1.95it/s]

Training_loss 5.94580


 85%|████████▌ | 1709/2000 [14:00<02:30,  1.93it/s]

Training_loss 5.94556


 86%|████████▌ | 1710/2000 [14:00<02:26,  1.98it/s]

Training_loss 5.94414


 86%|████████▌ | 1711/2000 [14:01<02:29,  1.94it/s]

Training_loss 5.93868


 86%|████████▌ | 1712/2000 [14:01<02:30,  1.92it/s]

Training_loss 5.93543


 86%|████████▌ | 1713/2000 [14:02<02:32,  1.88it/s]

Training_loss 5.92876


 86%|████████▌ | 1714/2000 [14:02<02:31,  1.89it/s]

Training_loss 5.92941


 86%|████████▌ | 1715/2000 [14:03<02:33,  1.85it/s]

Training_loss 5.92611


 86%|████████▌ | 1716/2000 [14:03<02:31,  1.88it/s]

Training_loss 5.92619


 86%|████████▌ | 1717/2000 [14:04<02:31,  1.87it/s]

Training_loss 5.92304


 86%|████████▌ | 1718/2000 [14:04<02:28,  1.90it/s]

Training_loss 5.92329


 86%|████████▌ | 1719/2000 [14:05<02:26,  1.92it/s]

Training_loss 5.92346


 86%|████████▌ | 1720/2000 [14:05<02:24,  1.94it/s]

Training_loss 5.92201


 86%|████████▌ | 1721/2000 [14:06<02:18,  2.01it/s]

Training_loss 5.91701


 86%|████████▌ | 1722/2000 [14:06<02:17,  2.02it/s]

Training_loss 5.91844


 86%|████████▌ | 1723/2000 [14:07<02:11,  2.11it/s]

Training_loss 5.92035


 86%|████████▌ | 1724/2000 [14:07<02:08,  2.14it/s]

Training_loss 5.91804


 86%|████████▋ | 1725/2000 [14:08<02:07,  2.15it/s]

Training_loss 5.91229


 86%|████████▋ | 1726/2000 [14:08<02:04,  2.19it/s]

Training_loss 5.90481


 86%|████████▋ | 1727/2000 [14:09<02:09,  2.11it/s]

Training_loss 5.90490


 86%|████████▋ | 1728/2000 [14:09<02:09,  2.10it/s]

Training_loss 5.89954


 86%|████████▋ | 1729/2000 [14:09<01:51,  2.43it/s]

Training_loss 5.89619


 86%|████████▋ | 1730/2000 [14:10<01:46,  2.54it/s]

Training_loss 5.88925


 87%|████████▋ | 1731/2000 [14:10<01:48,  2.48it/s]

Training_loss 5.88672


 87%|████████▋ | 1732/2000 [14:11<01:58,  2.27it/s]

Training_loss 5.88587


 87%|████████▋ | 1733/2000 [14:11<02:00,  2.22it/s]

Training_loss 5.88496


 87%|████████▋ | 1734/2000 [14:12<02:02,  2.17it/s]

Training_loss 5.88170


 87%|████████▋ | 1735/2000 [14:12<02:04,  2.12it/s]

Training_loss 5.88028


 87%|████████▋ | 1736/2000 [14:13<02:03,  2.14it/s]

Training_loss 5.87868


 87%|████████▋ | 1737/2000 [14:13<02:03,  2.13it/s]

Training_loss 5.87800


 87%|████████▋ | 1738/2000 [14:14<02:07,  2.05it/s]

Training_loss 5.87287


 87%|████████▋ | 1739/2000 [14:14<02:10,  2.00it/s]

Training_loss 5.87237


 87%|████████▋ | 1740/2000 [14:15<02:16,  1.90it/s]

Training_loss 5.87073


 87%|████████▋ | 1741/2000 [14:15<02:17,  1.88it/s]

Training_loss 5.87065


 87%|████████▋ | 1742/2000 [14:16<02:17,  1.88it/s]

Training_loss 5.87236


 87%|████████▋ | 1743/2000 [14:16<02:12,  1.95it/s]

Training_loss 5.86865


 87%|████████▋ | 1744/2000 [14:17<02:13,  1.92it/s]

Training_loss 5.86291


 87%|████████▋ | 1745/2000 [14:17<02:11,  1.94it/s]

Training_loss 5.85578


 87%|████████▋ | 1746/2000 [14:18<02:10,  1.94it/s]

Training_loss 5.84746


 87%|████████▋ | 1747/2000 [14:18<02:10,  1.93it/s]

Training_loss 5.84583


 87%|████████▋ | 1748/2000 [14:19<02:10,  1.93it/s]

Training_loss 5.83999


 87%|████████▋ | 1749/2000 [14:19<02:04,  2.02it/s]

Training_loss 5.83291


 88%|████████▊ | 1750/2000 [14:20<02:04,  2.00it/s]

Training_loss 5.83123


 88%|████████▊ | 1751/2000 [14:20<02:02,  2.03it/s]

Training_loss 5.82235


 88%|████████▊ | 1752/2000 [14:21<02:06,  1.96it/s]

Training_loss 5.81945


 88%|████████▊ | 1753/2000 [14:21<02:04,  1.99it/s]

Training_loss 5.81743


 88%|████████▊ | 1754/2000 [14:22<02:01,  2.03it/s]

Training_loss 5.82140


 88%|████████▊ | 1755/2000 [14:22<02:00,  2.03it/s]

Training_loss 5.82132


 88%|████████▊ | 1756/2000 [14:23<01:56,  2.09it/s]

Training_loss 5.82022


 88%|████████▊ | 1757/2000 [14:23<01:54,  2.12it/s]

Training_loss 5.82059


 88%|████████▊ | 1758/2000 [14:24<01:52,  2.15it/s]

Training_loss 5.81799


 88%|████████▊ | 1759/2000 [14:24<01:47,  2.25it/s]

Training_loss 5.81527


 88%|████████▊ | 1760/2000 [14:24<01:34,  2.53it/s]

Training_loss 5.81466


 88%|████████▊ | 1761/2000 [14:25<01:44,  2.29it/s]

Training_loss 5.80845


 88%|████████▊ | 1762/2000 [14:25<01:40,  2.37it/s]

Training_loss 5.80681


 88%|████████▊ | 1763/2000 [14:26<01:34,  2.51it/s]

Training_loss 5.80662


 88%|████████▊ | 1764/2000 [14:26<01:33,  2.54it/s]

Training_loss 5.80608


 88%|████████▊ | 1765/2000 [14:27<01:41,  2.31it/s]

Training_loss 5.80412


 88%|████████▊ | 1766/2000 [14:27<01:46,  2.20it/s]

Training_loss 5.80323


 88%|████████▊ | 1767/2000 [14:28<01:48,  2.15it/s]

Training_loss 5.80541


 88%|████████▊ | 1768/2000 [14:28<01:47,  2.17it/s]

Training_loss 5.79963


 88%|████████▊ | 1769/2000 [14:28<01:48,  2.13it/s]

Training_loss 5.79616


 88%|████████▊ | 1770/2000 [14:29<01:48,  2.11it/s]

Training_loss 5.79183


 89%|████████▊ | 1771/2000 [14:29<01:48,  2.11it/s]

Training_loss 5.78950


 89%|████████▊ | 1772/2000 [14:30<01:47,  2.13it/s]

Training_loss 5.78915


 89%|████████▊ | 1773/2000 [14:30<01:50,  2.05it/s]

Training_loss 5.78575


 89%|████████▊ | 1774/2000 [14:31<01:50,  2.05it/s]

Training_loss 5.78446


 89%|████████▉ | 1775/2000 [14:31<01:50,  2.04it/s]

Training_loss 5.78004


 89%|████████▉ | 1776/2000 [14:32<01:49,  2.04it/s]

Training_loss 5.77849


 89%|████████▉ | 1777/2000 [14:32<01:48,  2.05it/s]

Training_loss 5.77971


 89%|████████▉ | 1778/2000 [14:33<01:52,  1.97it/s]

Training_loss 5.78097


 89%|████████▉ | 1779/2000 [14:33<01:53,  1.95it/s]

Training_loss 5.77801


 89%|████████▉ | 1780/2000 [14:34<01:47,  2.04it/s]

Training_loss 5.77495


 89%|████████▉ | 1781/2000 [14:34<01:48,  2.01it/s]

Training_loss 5.77556


 89%|████████▉ | 1782/2000 [14:35<01:44,  2.09it/s]

Training_loss 5.77894


 89%|████████▉ | 1783/2000 [14:35<01:44,  2.08it/s]

Training_loss 5.78074


 89%|████████▉ | 1784/2000 [14:36<01:50,  1.95it/s]

Training_loss 5.78238


 89%|████████▉ | 1785/2000 [14:36<01:54,  1.88it/s]

Training_loss 5.77777


 89%|████████▉ | 1786/2000 [14:37<01:56,  1.84it/s]

Training_loss 5.77544


 89%|████████▉ | 1787/2000 [14:38<01:52,  1.89it/s]

Training_loss 5.77252


 89%|████████▉ | 1788/2000 [14:38<01:51,  1.90it/s]

Training_loss 5.76687


 89%|████████▉ | 1789/2000 [14:39<01:50,  1.92it/s]

Training_loss 5.76638


 90%|████████▉ | 1790/2000 [14:39<01:50,  1.91it/s]

Training_loss 5.76211


 90%|████████▉ | 1791/2000 [14:39<01:42,  2.04it/s]

Training_loss 5.76019


 90%|████████▉ | 1792/2000 [14:40<01:44,  2.00it/s]

Training_loss 5.76025


 90%|████████▉ | 1793/2000 [14:40<01:32,  2.25it/s]

Training_loss 5.75892


 90%|████████▉ | 1794/2000 [14:41<01:34,  2.19it/s]

Training_loss 5.75326


 90%|████████▉ | 1795/2000 [14:41<01:28,  2.32it/s]

Training_loss 5.75147


 90%|████████▉ | 1796/2000 [14:42<01:23,  2.45it/s]

Training_loss 5.75321


 90%|████████▉ | 1797/2000 [14:42<01:29,  2.26it/s]

Training_loss 5.75245


 90%|████████▉ | 1798/2000 [14:43<01:31,  2.22it/s]

Training_loss 5.75455


 90%|████████▉ | 1799/2000 [14:43<01:32,  2.16it/s]

Training_loss 5.75539


 90%|█████████ | 1800/2000 [14:44<01:35,  2.10it/s]

Training_loss 5.75295


 90%|█████████ | 1801/2000 [14:44<01:33,  2.12it/s]

Training_loss 5.75354


 90%|█████████ | 1802/2000 [14:45<01:38,  2.00it/s]

Training_loss 5.75302


 90%|█████████ | 1803/2000 [14:45<01:42,  1.92it/s]

Training_loss 5.75574


 90%|█████████ | 1804/2000 [14:46<01:38,  1.99it/s]

Training_loss 5.75199


 90%|█████████ | 1805/2000 [14:46<01:34,  2.05it/s]

Training_loss 5.75074


 90%|█████████ | 1806/2000 [14:47<01:33,  2.07it/s]

Training_loss 5.74407


 90%|█████████ | 1807/2000 [14:47<01:32,  2.08it/s]

Training_loss 5.74039


 90%|█████████ | 1808/2000 [14:47<01:32,  2.09it/s]

Training_loss 5.73756


 90%|█████████ | 1809/2000 [14:48<01:30,  2.10it/s]

Training_loss 5.74268


 90%|█████████ | 1810/2000 [14:48<01:32,  2.05it/s]

Training_loss 5.73996


 91%|█████████ | 1811/2000 [14:49<01:33,  2.02it/s]

Training_loss 5.74236


 91%|█████████ | 1812/2000 [14:49<01:34,  2.00it/s]

Training_loss 5.74476


 91%|█████████ | 1813/2000 [14:50<01:33,  1.99it/s]

Training_loss 5.74192


 91%|█████████ | 1814/2000 [14:50<01:33,  1.99it/s]

Training_loss 5.73518


 91%|█████████ | 1815/2000 [14:51<01:34,  1.96it/s]

Training_loss 5.72733


 91%|█████████ | 1816/2000 [14:52<01:33,  1.97it/s]

Training_loss 5.72220


 91%|█████████ | 1817/2000 [14:52<01:32,  1.99it/s]

Training_loss 5.72270


 91%|█████████ | 1818/2000 [14:53<01:31,  2.00it/s]

Training_loss 5.72280


 91%|█████████ | 1819/2000 [14:53<01:28,  2.04it/s]

Training_loss 5.71799


 91%|█████████ | 1820/2000 [14:53<01:29,  2.02it/s]

Training_loss 5.71590


 91%|█████████ | 1821/2000 [14:54<01:27,  2.05it/s]

Training_loss 5.71428


 91%|█████████ | 1822/2000 [14:54<01:27,  2.03it/s]

Training_loss 5.71387


 91%|█████████ | 1823/2000 [14:55<01:30,  1.95it/s]

Training_loss 5.70967


 91%|█████████ | 1824/2000 [14:56<01:30,  1.94it/s]

Training_loss 5.70242


 91%|█████████▏| 1825/2000 [14:56<01:28,  1.97it/s]

Training_loss 5.69999


 91%|█████████▏| 1826/2000 [14:56<01:18,  2.22it/s]

Training_loss 5.69590


 91%|█████████▏| 1827/2000 [14:57<01:11,  2.42it/s]

Training_loss 5.69822


 91%|█████████▏| 1828/2000 [14:57<01:16,  2.26it/s]

Training_loss 5.69450


 91%|█████████▏| 1829/2000 [14:58<01:13,  2.34it/s]

Training_loss 5.69198


 92%|█████████▏| 1830/2000 [14:58<01:14,  2.27it/s]

Training_loss 5.69029


 92%|█████████▏| 1831/2000 [14:59<01:17,  2.18it/s]

Training_loss 5.69154


 92%|█████████▏| 1832/2000 [14:59<01:17,  2.17it/s]

Training_loss 5.68559


 92%|█████████▏| 1833/2000 [15:00<01:18,  2.13it/s]

Training_loss 5.68091


 92%|█████████▏| 1834/2000 [15:00<01:20,  2.05it/s]

Training_loss 5.67699


 92%|█████████▏| 1835/2000 [15:01<01:22,  2.01it/s]

Training_loss 5.67494


 92%|█████████▏| 1836/2000 [15:01<01:20,  2.04it/s]

Training_loss 5.67422


 92%|█████████▏| 1837/2000 [15:02<01:18,  2.07it/s]

Training_loss 5.67143


 92%|█████████▏| 1838/2000 [15:02<01:19,  2.03it/s]

Training_loss 5.67116


 92%|█████████▏| 1839/2000 [15:03<01:19,  2.02it/s]

Training_loss 5.66829


 92%|█████████▏| 1840/2000 [15:03<01:20,  1.99it/s]

Training_loss 5.66393


 92%|█████████▏| 1841/2000 [15:04<01:19,  2.00it/s]

Training_loss 5.66196


 92%|█████████▏| 1842/2000 [15:04<01:19,  1.99it/s]

Training_loss 5.65833


 92%|█████████▏| 1843/2000 [15:05<01:17,  2.03it/s]

Training_loss 5.65658


 92%|█████████▏| 1844/2000 [15:05<01:17,  2.01it/s]

Training_loss 5.65361


 92%|█████████▏| 1845/2000 [15:06<01:19,  1.95it/s]

Training_loss 5.65238


 92%|█████████▏| 1846/2000 [15:06<01:19,  1.93it/s]

Training_loss 5.65192


 92%|█████████▏| 1847/2000 [15:07<01:17,  1.98it/s]

Training_loss 5.64960


 92%|█████████▏| 1848/2000 [15:07<01:17,  1.96it/s]

Training_loss 5.64676


 92%|█████████▏| 1849/2000 [15:08<01:19,  1.90it/s]

Training_loss 5.64363


 92%|█████████▎| 1850/2000 [15:08<01:17,  1.94it/s]

Training_loss 5.64298


 93%|█████████▎| 1851/2000 [15:09<01:16,  1.94it/s]

Training_loss 5.63758


 93%|█████████▎| 1852/2000 [15:09<01:16,  1.94it/s]

Training_loss 5.63969


 93%|█████████▎| 1853/2000 [15:10<01:12,  2.03it/s]

Training_loss 5.64180


 93%|█████████▎| 1854/2000 [15:10<01:11,  2.05it/s]

Training_loss 5.64115


 93%|█████████▎| 1855/2000 [15:11<01:09,  2.07it/s]

Training_loss 5.63758


 93%|█████████▎| 1856/2000 [15:11<01:09,  2.07it/s]

Training_loss 5.63693


 93%|█████████▎| 1857/2000 [15:12<01:08,  2.10it/s]

Training_loss 5.63763


 93%|█████████▎| 1858/2000 [15:12<01:07,  2.09it/s]

Training_loss 5.63390


 93%|█████████▎| 1859/2000 [15:12<01:06,  2.13it/s]

Training_loss 5.63466


 93%|█████████▎| 1860/2000 [15:13<01:04,  2.17it/s]

Training_loss 5.63476


 93%|█████████▎| 1861/2000 [15:13<00:59,  2.34it/s]

Training_loss 5.63929


 93%|█████████▎| 1862/2000 [15:14<00:54,  2.53it/s]

Training_loss 5.63852


 93%|█████████▎| 1863/2000 [15:14<00:57,  2.38it/s]

Training_loss 5.63845


 93%|█████████▎| 1864/2000 [15:14<00:57,  2.36it/s]

Training_loss 5.63510


 93%|█████████▎| 1865/2000 [15:15<00:51,  2.61it/s]

Training_loss 5.62874


 93%|█████████▎| 1866/2000 [15:15<00:52,  2.55it/s]

Training_loss 5.62662


 93%|█████████▎| 1867/2000 [15:16<00:55,  2.39it/s]

Training_loss 5.62791


 93%|█████████▎| 1868/2000 [15:16<00:57,  2.29it/s]

Training_loss 5.62291


 93%|█████████▎| 1869/2000 [15:17<00:58,  2.24it/s]

Training_loss 5.61993


 94%|█████████▎| 1870/2000 [15:17<00:59,  2.19it/s]

Training_loss 5.61851


 94%|█████████▎| 1871/2000 [15:18<00:58,  2.19it/s]

Training_loss 5.61897


 94%|█████████▎| 1872/2000 [15:18<00:57,  2.21it/s]

Training_loss 5.61989


 94%|█████████▎| 1873/2000 [15:18<00:58,  2.18it/s]

Training_loss 5.61875


 94%|█████████▎| 1874/2000 [15:19<00:57,  2.18it/s]

Training_loss 5.62042


 94%|█████████▍| 1875/2000 [15:19<00:56,  2.23it/s]

Training_loss 5.61680


 94%|█████████▍| 1876/2000 [15:20<00:56,  2.21it/s]

Training_loss 5.60984


 94%|█████████▍| 1877/2000 [15:20<00:56,  2.17it/s]

Training_loss 5.60776


 94%|█████████▍| 1878/2000 [15:21<00:58,  2.10it/s]

Training_loss 5.60192


 94%|█████████▍| 1879/2000 [15:21<00:59,  2.05it/s]

Training_loss 5.60108


 94%|█████████▍| 1880/2000 [15:22<00:57,  2.09it/s]

Training_loss 5.59467


 94%|█████████▍| 1881/2000 [15:22<00:58,  2.03it/s]

Training_loss 5.59263


 94%|█████████▍| 1882/2000 [15:23<00:57,  2.04it/s]

Training_loss 5.59393


 94%|█████████▍| 1883/2000 [15:23<00:57,  2.02it/s]

Training_loss 5.59158


 94%|█████████▍| 1884/2000 [15:24<00:56,  2.06it/s]

Training_loss 5.58561


 94%|█████████▍| 1885/2000 [15:24<00:55,  2.09it/s]

Training_loss 5.58301


 94%|█████████▍| 1886/2000 [15:25<00:53,  2.14it/s]

Training_loss 5.58337


 94%|█████████▍| 1887/2000 [15:25<00:53,  2.10it/s]

Training_loss 5.57887


 94%|█████████▍| 1888/2000 [15:26<00:52,  2.13it/s]

Training_loss 5.57990


 94%|█████████▍| 1889/2000 [15:26<00:52,  2.13it/s]

Training_loss 5.57948


 94%|█████████▍| 1890/2000 [15:27<00:51,  2.13it/s]

Training_loss 5.58153


 95%|█████████▍| 1891/2000 [15:27<00:51,  2.13it/s]

Training_loss 5.57991


 95%|█████████▍| 1892/2000 [15:27<00:50,  2.13it/s]

Training_loss 5.57545


 95%|█████████▍| 1893/2000 [15:28<00:50,  2.13it/s]

Training_loss 5.57790


 95%|█████████▍| 1894/2000 [15:28<00:46,  2.26it/s]

Training_loss 5.57574


 95%|█████████▍| 1895/2000 [15:29<00:44,  2.38it/s]

Training_loss 5.57425


 95%|█████████▍| 1896/2000 [15:29<00:44,  2.33it/s]

Training_loss 5.57320


 95%|█████████▍| 1897/2000 [15:29<00:40,  2.57it/s]

Training_loss 5.56708


 95%|█████████▍| 1898/2000 [15:30<00:41,  2.49it/s]

Training_loss 5.57074


 95%|█████████▍| 1899/2000 [15:30<00:44,  2.28it/s]

Training_loss 5.57071


 95%|█████████▌| 1900/2000 [15:31<00:44,  2.24it/s]

Training_loss 5.56720


 95%|█████████▌| 1901/2000 [15:31<00:44,  2.25it/s]

Training_loss 5.56406


 95%|█████████▌| 1902/2000 [15:32<00:44,  2.21it/s]

Training_loss 5.55924


 95%|█████████▌| 1903/2000 [15:32<00:45,  2.15it/s]

Training_loss 5.55712


 95%|█████████▌| 1904/2000 [15:33<00:45,  2.13it/s]

Training_loss 5.55328


 95%|█████████▌| 1905/2000 [15:33<00:44,  2.12it/s]

Training_loss 5.54821


 95%|█████████▌| 1906/2000 [15:34<00:43,  2.15it/s]

Training_loss 5.54080


 95%|█████████▌| 1907/2000 [15:34<00:44,  2.10it/s]

Training_loss 5.53965


 95%|█████████▌| 1908/2000 [15:35<00:43,  2.12it/s]

Training_loss 5.54185


 95%|█████████▌| 1909/2000 [15:35<00:43,  2.09it/s]

Training_loss 5.54293


 96%|█████████▌| 1910/2000 [15:35<00:40,  2.23it/s]

Training_loss 5.54159


 96%|█████████▌| 1911/2000 [15:36<00:38,  2.31it/s]

Training_loss 5.54386


 96%|█████████▌| 1912/2000 [15:36<00:35,  2.46it/s]

Training_loss 5.54472


 96%|█████████▌| 1913/2000 [15:37<00:36,  2.39it/s]

Training_loss 5.53900


 96%|█████████▌| 1914/2000 [15:37<00:39,  2.20it/s]

Training_loss 5.53186


 96%|█████████▌| 1915/2000 [15:38<00:39,  2.18it/s]

Training_loss 5.53383


 96%|█████████▌| 1916/2000 [15:38<00:39,  2.13it/s]

Training_loss 5.53266


 96%|█████████▌| 1917/2000 [15:39<00:40,  2.07it/s]

Training_loss 5.52939


 96%|█████████▌| 1918/2000 [15:39<00:39,  2.08it/s]

Training_loss 5.53098


 96%|█████████▌| 1919/2000 [15:40<00:39,  2.05it/s]

Training_loss 5.53282


 96%|█████████▌| 1920/2000 [15:40<00:41,  1.94it/s]

Training_loss 5.52799


 96%|█████████▌| 1921/2000 [15:41<00:40,  1.94it/s]

Training_loss 5.52531


 96%|█████████▌| 1922/2000 [15:41<00:39,  1.96it/s]

Training_loss 5.52194


 96%|█████████▌| 1923/2000 [15:42<00:38,  2.00it/s]

Training_loss 5.52098


 96%|█████████▌| 1924/2000 [15:42<00:37,  2.04it/s]

Training_loss 5.51428


 96%|█████████▋| 1925/2000 [15:43<00:36,  2.05it/s]

Training_loss 5.50848


 96%|█████████▋| 1926/2000 [15:43<00:35,  2.07it/s]

Training_loss 5.50903


 96%|█████████▋| 1927/2000 [15:44<00:35,  2.08it/s]

Training_loss 5.50193


 96%|█████████▋| 1928/2000 [15:44<00:32,  2.20it/s]

Training_loss 5.50009


 96%|█████████▋| 1929/2000 [15:44<00:28,  2.46it/s]

Training_loss 5.49752


 96%|█████████▋| 1930/2000 [15:45<00:28,  2.43it/s]

Training_loss 5.49673


 97%|█████████▋| 1931/2000 [15:45<00:27,  2.47it/s]

Training_loss 5.49555


 97%|█████████▋| 1932/2000 [15:45<00:25,  2.65it/s]

Training_loss 5.49564


 97%|█████████▋| 1933/2000 [15:46<00:27,  2.42it/s]

Training_loss 5.49500


 97%|█████████▋| 1934/2000 [15:46<00:28,  2.30it/s]

Training_loss 5.48979


 97%|█████████▋| 1935/2000 [15:47<00:30,  2.15it/s]

Training_loss 5.48918


 97%|█████████▋| 1936/2000 [15:47<00:30,  2.07it/s]

Training_loss 5.48861


 97%|█████████▋| 1937/2000 [15:48<00:31,  2.03it/s]

Training_loss 5.48470


 97%|█████████▋| 1938/2000 [15:49<00:31,  1.99it/s]

Training_loss 5.48234


 97%|█████████▋| 1939/2000 [15:49<00:30,  2.00it/s]

Training_loss 5.48007


 97%|█████████▋| 1940/2000 [15:50<00:30,  1.99it/s]

Training_loss 5.47863


 97%|█████████▋| 1941/2000 [15:50<00:28,  2.05it/s]

Training_loss 5.47575


 97%|█████████▋| 1942/2000 [15:50<00:28,  2.05it/s]

Training_loss 5.47503


 97%|█████████▋| 1943/2000 [15:51<00:27,  2.07it/s]

Training_loss 5.47446


 97%|█████████▋| 1944/2000 [15:51<00:27,  2.07it/s]

Training_loss 5.47548


 97%|█████████▋| 1945/2000 [15:52<00:26,  2.08it/s]

Training_loss 5.47462


 97%|█████████▋| 1946/2000 [15:52<00:25,  2.09it/s]

Training_loss 5.47174


 97%|█████████▋| 1947/2000 [15:53<00:25,  2.07it/s]

Training_loss 5.47585


 97%|█████████▋| 1948/2000 [15:53<00:25,  2.05it/s]

Training_loss 5.47287


 97%|█████████▋| 1949/2000 [15:54<00:23,  2.14it/s]

Training_loss 5.46731


 98%|█████████▊| 1950/2000 [15:54<00:22,  2.18it/s]

Training_loss 5.47025


 98%|█████████▊| 1951/2000 [15:55<00:22,  2.15it/s]

Training_loss 5.46592


 98%|█████████▊| 1952/2000 [15:55<00:22,  2.11it/s]

Training_loss 5.46590


 98%|█████████▊| 1953/2000 [15:56<00:23,  2.02it/s]

Training_loss 5.46187


 98%|█████████▊| 1954/2000 [15:56<00:23,  1.95it/s]

Training_loss 5.46488


 98%|█████████▊| 1955/2000 [15:57<00:23,  1.89it/s]

Training_loss 5.46646


 98%|█████████▊| 1956/2000 [15:57<00:22,  1.93it/s]

Training_loss 5.46892


 98%|█████████▊| 1957/2000 [15:58<00:21,  2.00it/s]

Training_loss 5.46739


 98%|█████████▊| 1958/2000 [15:58<00:20,  2.02it/s]

Training_loss 5.47455


 98%|█████████▊| 1959/2000 [15:59<00:19,  2.07it/s]

Training_loss 5.46983


 98%|█████████▊| 1960/2000 [15:59<00:19,  2.02it/s]

Training_loss 5.47221


 98%|█████████▊| 1961/2000 [16:00<00:19,  2.05it/s]

Training_loss 5.46615


 98%|█████████▊| 1962/2000 [16:00<00:17,  2.14it/s]

Training_loss 5.46160


 98%|█████████▊| 1963/2000 [16:01<00:16,  2.28it/s]

Training_loss 5.46247


 98%|█████████▊| 1964/2000 [16:01<00:15,  2.34it/s]

Training_loss 5.46442


 98%|█████████▊| 1965/2000 [16:01<00:14,  2.38it/s]

Training_loss 5.46059


 98%|█████████▊| 1966/2000 [16:02<00:12,  2.62it/s]

Training_loss 5.45972


 98%|█████████▊| 1967/2000 [16:02<00:13,  2.48it/s]

Training_loss 5.45862


 98%|█████████▊| 1968/2000 [16:03<00:13,  2.34it/s]

Training_loss 5.45162


 98%|█████████▊| 1969/2000 [16:03<00:13,  2.30it/s]

Training_loss 5.44485


 98%|█████████▊| 1970/2000 [16:04<00:13,  2.21it/s]

Training_loss 5.44132


 99%|█████████▊| 1971/2000 [16:04<00:12,  2.24it/s]

Training_loss 5.43849


 99%|█████████▊| 1972/2000 [16:04<00:12,  2.23it/s]

Training_loss 5.43863


 99%|█████████▊| 1973/2000 [16:05<00:12,  2.12it/s]

Training_loss 5.43293


 99%|█████████▊| 1974/2000 [16:05<00:12,  2.13it/s]

Training_loss 5.43103


 99%|█████████▉| 1975/2000 [16:06<00:11,  2.21it/s]

Training_loss 5.42904


 99%|█████████▉| 1976/2000 [16:06<00:10,  2.19it/s]

Training_loss 5.42933


 99%|█████████▉| 1977/2000 [16:07<00:10,  2.21it/s]

Training_loss 5.42622


 99%|█████████▉| 1978/2000 [16:07<00:10,  2.14it/s]

Training_loss 5.42619


 99%|█████████▉| 1979/2000 [16:08<00:09,  2.13it/s]

Training_loss 5.42517


 99%|█████████▉| 1980/2000 [16:08<00:09,  2.11it/s]

Training_loss 5.42391


 99%|█████████▉| 1981/2000 [16:09<00:09,  2.08it/s]

Training_loss 5.42526


 99%|█████████▉| 1982/2000 [16:09<00:08,  2.10it/s]

Training_loss 5.42304


 99%|█████████▉| 1983/2000 [16:10<00:08,  2.12it/s]

Training_loss 5.42136


 99%|█████████▉| 1984/2000 [16:10<00:07,  2.09it/s]

Training_loss 5.41642


 99%|█████████▉| 1985/2000 [16:11<00:07,  2.11it/s]

Training_loss 5.41312


 99%|█████████▉| 1986/2000 [16:11<00:06,  2.08it/s]

Training_loss 5.41614


 99%|█████████▉| 1987/2000 [16:12<00:06,  2.02it/s]

Training_loss 5.41144


 99%|█████████▉| 1988/2000 [16:12<00:05,  2.10it/s]

Training_loss 5.40661


 99%|█████████▉| 1989/2000 [16:13<00:05,  2.09it/s]

Training_loss 5.40721


100%|█████████▉| 1990/2000 [16:13<00:04,  2.13it/s]

Training_loss 5.40936


100%|█████████▉| 1991/2000 [16:13<00:04,  2.04it/s]

Training_loss 5.41051


100%|█████████▉| 1992/2000 [16:14<00:04,  1.99it/s]

Training_loss 5.41326


100%|█████████▉| 1993/2000 [16:15<00:03,  1.99it/s]

Training_loss 5.41337


100%|█████████▉| 1994/2000 [16:15<00:03,  1.99it/s]

Training_loss 5.40712


100%|█████████▉| 1995/2000 [16:16<00:02,  1.97it/s]

Training_loss 5.40122


100%|█████████▉| 1996/2000 [16:16<00:02,  1.95it/s]

Training_loss 5.40250


100%|█████████▉| 1997/2000 [16:16<00:01,  2.26it/s]

Training_loss 5.40154


100%|█████████▉| 1998/2000 [16:17<00:00,  2.18it/s]

Training_loss 5.39717


100%|█████████▉| 1999/2000 [16:17<00:00,  2.12it/s]

Training_loss 5.39347


100%|██████████| 2000/2000 [16:18<00:00,  2.04it/s]

Training_loss 5.39559





In [21]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([ 0.2975,  0.2338,  0.3959,  0.4531, -0.2543, -0.5216, -0.2689,  0.0679,
         0.0881, -0.4987,  0.9672,  0.2327,  0.1979,  0.2809,  0.0547,  0.2902,
        -0.1267, -0.5663,  1.1430, -0.4069,  0.1269, -0.0773,  0.3289, -0.1364,
         0.3826,  0.7262, -0.0390,  0.0828,  1.0114, -0.9897,  0.1473,  0.5010,
        -0.5570,  0.5004,  1.1288,  0.2513,  1.8504,  1.7718,  2.0736,  2.6678],
       grad_fn=<CatBackward0>)

In [22]:
for j in G.neighbors(0):
    print(j)

1
4
14
16
18
19


In [23]:
parameters_to_vector(models[0].parameters())

tensor([ 0.4526, -1.0551,  0.1519, -0.1993, -0.0724, -0.4891,  0.0398,  0.4347,
        -0.5021, -0.2642, -0.0116,  0.4408, -0.2803,  0.0456, -0.3797, -0.1006,
         0.0678,  0.5890,  0.3017, -0.8847,  0.3578, -0.1197,  0.5848,  0.1214,
         0.4657,  0.4148, -0.1520, -1.1701,  0.4323, -0.1848,  0.0093,  0.1662,
        -0.2472,  0.1840,  1.0480,  0.0881,  2.2527,  1.4496,  2.0630,  2.3970],
       grad_fn=<CatBackward0>)

In [24]:
projection_list[0]

[0,
 tensor([[1.9948, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.9948, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 1.9948,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 1.9948, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.9948, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.9948]]),
 0,
 0,
 tensor([[1.1173, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.1173, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 1.1173,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 1.1173, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.1173, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.1173]]),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 tensor([[1.1013, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.1013, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [

In [25]:
projected_weights[0]

[0,
 tensor([ 0.2548,  0.0658, -0.0045,  0.0911, -0.0738,  0.0299,  0.0360,  0.1597,
          0.0923,  0.2516, -0.0078,  0.5584, -0.6457,  0.6559, -0.0656, -0.6471,
         -0.1306, -0.1466,  0.6234,  0.5314,  0.2881, -0.4003,  0.2266,  0.4650,
          0.3858,  0.4165, -0.1101, -0.1303,  0.5773, -0.4849,  0.2523, -0.5749,
         -0.1249, -0.1640, -0.0380, -0.3655,  0.8216,  0.3899,  0.0842, -0.5236]),
 0,
 0,
 tensor([ 3.5971e-01, -3.1273e-01,  9.5203e-02,  1.8245e-01,  1.4223e-01,
          2.6246e-01, -4.3272e-02, -1.8076e-01, -3.6497e-01, -2.8299e-01,
          1.3521e-01, -1.3890e-01,  3.6521e-02,  1.7641e-01,  3.2532e-01,
         -2.6646e-01,  3.6708e-01,  1.8529e-01, -7.7722e-02,  2.5398e-01,
         -3.6904e-04,  1.2581e-01,  3.6968e-01,  1.1169e-01, -5.0109e-02,
          6.3174e-02,  2.3317e-01,  2.0360e-02, -2.5412e-01, -3.3120e-01,
         -1.1161e-01,  3.6355e-01,  3.1893e-01, -3.2759e-01,  1.6140e-01,
          3.3690e-01, -5.5159e-01,  3.2794e-01,  5.5532e-01,  3

In [26]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [27]:
np.save( 'training_loss_dfedu' + str(eta).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), test_loss)
#np.save('relative_error_dfedu' + str(eta).replace('.', '_'), total_rel_error)