In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
#pout = 0.01
pout = 0.2
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-3
no_users = sum(cluster_sizes)
batch_size = 50
epochs = 1
d0 = 100 # embedding dimension 
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.         0.         0.09090909 0.09090909 0.09090909
  0.09090909 0.09090909 0.         0.09090909 0.         0.
  0.         0.09090909 0.         0.         0.         0.09090909
  0.09090909 0.09090909]
 [0.         0.         0.14285714 0.1        0.125      0.
  0.         0.         0.14285714 0.         0.         0.
  0.14285714 0.         0.         0.         0.         0.
  0.         0.14285714]
 [0.         0.14285714 0.         0.1        0.         0.
  0.14285714 0.1        0.         0.         0.         0.
  0.         0.         0.         0.         0.14285714 0.09090909
  0.         0.        ]
 [0.09090909 0.1        0.1        0.         0.1        0.
  0.1        0.1        0.1        0.         0.         0.
  0.1        0.         0.         0.         0.         0.09090909
  0.         0.        ]
 [0.09090909 0.125      0.         0.1        0.         0.
  0.         0.1        0.         0.         0.         0.125
  0.         0.11111111 0

In [4]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [5]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.size)
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split





X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

no_features = X_train.shape[1]

X, y = random_split(X_train, y_train, no_users, 1234)


In [6]:
no_features

561

In [7]:
datapoints = {}
count = 0
W1 = np.array([2.0, 2.0, 3.0, 3.0])
W2 = np.array([-2.0, 2.0, 3.0, -3.0])
W3 = 2 * W1
W4 = 2  * W2
W = [W1, W2]
m = 200
n = 4

scaler = [1.0, -1.0]

noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i ]) + np.random.normal(0,noise_sd)
        data = X[count]
        data[:, 0:4] *= scaler[i]
        datapoints[count] = {
                'features': data,
                'degree': node_degree(count, G),
                'label': y[count],
                'neighbors': get_neighbors(count, G),
                'exact_weights': torch.from_numpy(W[i])
            }
        count += 1

In [8]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [9]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(9, 4, bias=False)
        self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        #output = self.fc3(x)
        return output

In [10]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [11]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=100, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([100, 1])
torch.Size([100, 1])
0 tensor(51.9353, grad_fn=<MseLossBackward0>) tensor([ 0.2695,  0.2279, -0.7095, -1.2798,  1.0796,  0.9197,  1.2352,  0.9011,
         1.6015, -0.0722,  0.0690,  0.1347, -0.0845, -0.0168, -0.0400, -0.0171,
         0.3727, -0.2183, -1.3384,  1.4679, -0.5565, -0.0982,  0.1593,  0.1134,
         0.2279,  0.6948, -0.2222,  2.1648, -2.3557,  0.7769, -0.0517,  0.1631,
         0.1208,  0.0912, -1.5467,  0.8862, -4.0623, -3.2867, -2.8103, -3.4812]) tensor([-0.2203, -0.0944, -0.1953, -0.2474,  0.0719,  0.3130,  0.1772,  0.0592,
         0.2724, -0.2618, -0.2390,  0.2787, -0.0686,  0.0438, -0.0515, -0.1145,
         0.2534, -0.2669, -0.2271,  0.2670, -0.2364, -0.0458, -0.1374,  0.0580,
         0.1468, -0.1552, -0.0576, -0.2660,  0.2111, -0.2775, -0.0310, -0.0282,
         0.3324, -0.2274,  0.0509, -0.2227, -0.3566, -0.0614, -0.2687,  0.4448],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
0 tensor(51.0044, grad_fn=<MseLossBac

torch.Size([100, 1])
torch.Size([100, 1])
5 tensor(15.1859, grad_fn=<MseLossBackward0>) tensor([-2.1856,  0.6891,  1.9889, -1.3234,  0.5405,  1.3414,  0.6800, -0.8532,
        -2.9948, -2.1560,  1.0308,  0.5392, -1.7195,  1.4150,  2.2769,  1.4448,
        -2.2547, -0.1021, -0.0616, -0.0530,  0.1591, -0.0490, -0.1045,  0.1494,
        -0.1269, -0.1120,  0.0166, -0.6543, -1.0181,  1.3689, -0.9783, -0.4984,
         1.1365, -0.8767, -1.8995, -1.4237, -2.5421, -1.4264, -0.0799, -0.9659]) tensor([ 0.2420, -0.4209, -0.3831, -0.3708,  0.1794,  0.3622,  0.3069,  0.1845,
         0.7562,  0.2032, -0.6299,  0.7216, -0.3597,  0.1518, -0.0052,  0.0058,
         0.9060, -0.3979, -0.1932,  0.2483, -0.2638, -0.0131, -0.1523,  0.0152,
         0.1314, -0.1805, -0.0148, -1.3359,  1.5635, -0.5641,  0.0154,  0.0323,
         0.0548, -0.1142,  0.6142, -0.2978,  1.0703,  1.3116,  0.2640,  2.1756],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
5 tensor(14.9211, grad_fn=<MseLossBac

10 tensor(7.1665, grad_fn=<MseLossBackward0>) tensor([ 0.4576, -1.2714,  0.2649, -0.3877,  0.2200,  0.3040,  0.1735, -0.6493,
        -0.4148, -0.3835, -0.4047,  0.5483, -0.4846,  0.2324,  0.3932,  0.1088,
         0.1243,  0.0124, -0.0347, -0.0482,  0.0423, -0.0093, -0.0949,  0.0984,
        -0.1172, -0.1129,  0.1255, -0.7312, -0.3199,  0.6657, -0.1655, -0.8526,
         0.3918, -1.2619, -0.7808, -0.1827,  0.1495,  0.2952, -0.0290, -0.4413]) tensor([ 0.6974, -0.5572, -0.8443, -0.1989,  0.0423,  0.1183,  0.2147,  0.3230,
         1.2056,  0.6015, -0.7742,  0.6344, -0.1621, -0.0604, -0.3382, -0.1913,
         1.1937, -0.3614, -0.1546,  0.2396, -0.3018, -0.0041, -0.1242, -0.0094,
         0.1649, -0.1952, -0.0233, -1.1800,  1.6820, -0.5797,  0.0625,  0.2649,
        -0.1354,  0.1499,  0.6459, -0.3746,  1.6958,  1.6434,  0.2844,  2.2108],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
10 tensor(7.2661, grad_fn=<MseLossBackward0>) tensor([-0.0107, -0.5359,  0.2942

11 tensor(6.9891, grad_fn=<MseLossBackward0>) tensor([ 0.5772, -1.4279,  0.2957, -0.2908,  0.1144,  0.1830,  0.0725, -0.7303,
        -0.4263, -0.4141, -0.3507,  0.4975, -0.2883,  0.0028,  0.1959, -0.1193,
         0.2630, -0.0948, -0.0260, -0.0564,  0.0407, -0.0031, -0.0923,  0.0877,
        -0.1154, -0.1205,  0.1327, -0.6974, -0.3357,  0.6578, -0.1697, -0.8060,
         0.3458, -1.2157, -0.7530, -0.2451,  0.1294,  0.4074, -0.0402, -0.4801]) tensor([ 7.0327e-01, -5.1829e-01, -8.6965e-01, -1.9621e-01,  4.4932e-02,
         1.0989e-01,  2.2550e-01,  3.3936e-01,  1.2166e+00,  6.2178e-01,
        -7.5569e-01,  6.3843e-01, -1.5796e-01, -6.3732e-02, -3.5829e-01,
        -1.9084e-01,  1.2065e+00, -3.8919e-01, -1.4899e-01,  2.3909e-01,
        -3.0507e-01, -9.6684e-04, -1.2150e-01, -1.3895e-02,  1.6868e-01,
        -1.9669e-01, -2.8267e-02, -1.1445e+00,  1.6936e+00, -5.7502e-01,
         8.0775e-02,  2.8981e-01, -1.6846e-01,  1.8153e-01,  6.3979e-01,
        -3.9376e-01,  1.7079e+00,  1.6627e

16 tensor(6.8012, grad_fn=<MseLossBackward0>) tensor([ 0.0331, -0.5298,  0.2500, -0.1280, -0.0881,  0.3762,  0.0294, -0.4277,
        -0.4302,  0.2296, -0.6676, -0.1598, -0.1972,  0.1847,  0.5029,  0.1880,
         0.2432, -0.1351, -0.2165,  0.1235,  0.0435, -0.1060,  0.0789,  0.0481,
         0.0544, -0.0501,  0.0176, -0.5232, -0.0165, -1.1184, -0.5563,  0.4193,
         0.3788,  0.4743, -0.6903,  0.1659, -0.3958,  0.3930,  0.1591,  0.3043]) tensor([ 0.7217, -0.3283, -0.9580, -0.1794,  0.0558,  0.0925,  0.2756,  0.4073,
         1.2442,  0.7494, -0.6995,  0.6553, -0.1568, -0.0515, -0.4109, -0.1625,
         1.1860, -0.5102, -0.1222,  0.2372, -0.3200,  0.0171, -0.1146, -0.0342,
         0.1818, -0.2027, -0.0576, -0.9830,  1.7600, -0.5437,  0.1763,  0.3807,
        -0.3016,  0.3112,  0.6316, -0.4565,  1.7434,  1.7198,  0.3038,  2.2232],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
16 tensor(5.9338, grad_fn=<MseLossBackward0>) tensor([-0.0422, -0.2218, -0.0937

torch.Size([100, 1])
torch.Size([100, 1])
22 tensor(6.4795, grad_fn=<MseLossBackward0>) tensor([-0.1241, -0.3840,  0.0285, -0.2219,  0.0702,  0.3472,  0.1788, -0.4810,
        -0.5287, -0.0666, -0.4577,  0.3757, -0.4136,  0.2618,  0.5085,  0.2293,
         0.3132, -0.2704, -0.2193,  0.1301,  0.0492, -0.1158,  0.0971,  0.0382,
         0.0701, -0.0511,  0.0222, -0.4760, -0.0079, -1.0266, -0.5731,  0.5223,
         0.2833,  0.5609, -0.7448,  0.1140, -0.4769,  0.4787,  0.1144,  0.2296]) tensor([ 0.7611, -0.1555, -1.0056, -0.1343,  0.0544,  0.0852,  0.2986,  0.4978,
         1.2571,  0.8861, -0.6493,  0.6619, -0.1751, -0.0145, -0.4188, -0.1071,
         1.1087, -0.6577, -0.0888,  0.2297, -0.3378,  0.0381, -0.1115, -0.0480,
         0.1924, -0.2123, -0.1000, -0.8227,  1.8313, -0.4758,  0.2756,  0.4370,
        -0.3930,  0.4084,  0.6416, -0.5114,  1.7840,  1.7610,  0.3302,  2.2549],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
22 tensor(5.4523, grad_fn=<MseLossBac

torch.Size([100, 1])
torch.Size([100, 1])
28 tensor(5.0201, grad_fn=<MseLossBackward0>) tensor([ 0.6453, -1.3853,  0.3714, -0.2107, -0.1904, -0.0393, -0.2185, -0.4563,
        -0.5207,  0.1400, -0.5323,  0.2674,  0.2196, -0.5126, -0.1713, -0.5452,
         0.8029,  0.3913, -0.0348, -0.0086,  0.0773,  0.0706, -0.1559, -0.0270,
        -0.1647, -0.0614,  0.2818, -0.3405, -0.3483,  0.5090,  0.4756, -1.0639,
        -0.4257, -1.3066, -0.5415, -0.1717, -0.4163,  0.6467, -0.1673, -0.7084]) tensor([ 0.8277, -0.0493, -0.9900, -0.0998,  0.0495,  0.0677,  0.3125,  0.6114,
         1.2665,  0.9943, -0.5938,  0.6314, -0.1686,  0.0145, -0.4323, -0.0619,
         0.9880, -0.8311, -0.0573,  0.2196, -0.3578,  0.0618, -0.1115, -0.0564,
         0.1987, -0.2319, -0.1387, -0.6851,  1.8957, -0.4070,  0.3653,  0.4554,
        -0.4404,  0.4649,  0.6406, -0.5511,  1.8328,  1.7864,  0.3695,  2.2830],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
28 tensor(6.1427, grad_fn=<MseLossBac

34 tensor(5.1220, grad_fn=<MseLossBackward0>) tensor([-0.5147,  0.2463, -0.0703,  0.5482, -0.8756, -0.2771, -0.8196,  0.2044,
        -0.0857, -0.5321,  0.6974, -0.2373,  0.0961, -0.3229,  0.5639, -0.0128,
         0.7665,  0.2091, -0.0667, -0.0074,  0.1032, -0.0129, -0.1621, -0.1634,
        -0.1371,  0.1273,  0.0619, -0.2567, -0.1748,  0.2585,  0.0905, -0.8619,
        -1.0725, -0.6390,  0.3058,  0.3106, -0.3527, -0.4912, -0.2026, -0.1822]) tensor([ 0.8830,  0.0536, -0.9745, -0.0419,  0.0336,  0.0387,  0.3126,  0.6803,
         1.2920,  1.0750, -0.5310,  0.5965, -0.1628,  0.0358, -0.4279, -0.0210,
         0.8714, -0.9696, -0.0317,  0.2121, -0.3771,  0.0810, -0.1088, -0.0580,
         0.2076, -0.2569, -0.1699, -0.5517,  1.9397, -0.3422,  0.4370,  0.4535,
        -0.4798,  0.4991,  0.6508, -0.5663,  1.8822,  1.8051,  0.4146,  2.2962],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
34 tensor(5.0937, grad_fn=<MseLossBackward0>) tensor([ 0.4853, -0.6277,  0.2732

40 tensor(5.2158, grad_fn=<MseLossBackward0>) tensor([-9.9064e-01,  9.3381e-01, -4.9764e-01, -4.9960e-01,  2.9990e-01,
         5.7985e-01,  3.8532e-01, -3.4309e-01,  2.8480e+00, -1.0102e+00,
         8.1407e-01,  8.7621e-01,  4.1710e-02, -1.7572e-01,  1.3617e-01,
        -1.8834e-01,  2.2651e-01,  5.3816e-04, -2.2178e-01,  3.0315e-01,
        -1.8048e-02,  8.8595e-02, -3.1223e-02, -9.5964e-02, -3.8074e-02,
         9.7352e-02,  4.2618e-01, -2.4358e+00,  2.5057e+00,  7.0725e-01,
         1.0753e+00, -7.6290e-01, -1.0372e+00, -8.5592e-01,  5.7615e-01,
         2.2241e+00,  1.7342e+00, -4.8145e-01, -5.0863e-02,  2.2790e+00]) tensor([ 0.9336,  0.1493, -0.9575, -0.0054,  0.0329,  0.0230,  0.3337,  0.7389,
         1.3284,  1.1169, -0.4503,  0.5827, -0.1598,  0.0403, -0.4233,  0.0044,
         0.7606, -1.1229, -0.0180,  0.2170, -0.4005,  0.1020, -0.1064, -0.0571,
         0.2177, -0.2892, -0.1935, -0.4590,  2.0132, -0.2898,  0.4963,  0.4496,
        -0.5000,  0.5284,  0.6604, -0.5630,  1.94

torch.Size([100, 1])
torch.Size([100, 1])
46 tensor(4.7482, grad_fn=<MseLossBackward0>) tensor([-0.6986,  0.4326, -0.1500,  0.7680, -1.0333, -0.4205, -1.0044,  0.1263,
        -0.3057, -0.6465,  0.8212, -0.1837,  0.2332, -0.4203,  0.5430, -0.1401,
         0.7402, -0.0212, -0.0385, -0.0217,  0.1114, -0.0389, -0.1694, -0.2353,
        -0.1431,  0.1364,  0.0549, -0.1013, -0.1849,  0.2824,  0.1189, -0.8126,
        -1.2844, -0.6014,  0.2620,  0.2861, -0.5692, -0.5070, -0.2152, -0.1834]) tensor([ 1.0021,  0.2146, -0.9196,  0.0334,  0.0287, -0.0032,  0.3517,  0.8009,
         1.3020,  1.1576, -0.3994,  0.5400, -0.1579,  0.0483, -0.4211,  0.0330,
         0.6477, -1.2788, -0.0122,  0.2256, -0.4224,  0.1214, -0.1065, -0.0516,
         0.2271, -0.3223, -0.2240, -0.3390,  2.0262, -0.2653,  0.5183,  0.4538,
        -0.4918,  0.5626,  0.6502, -0.5943,  1.9712,  1.8924,  0.5333,  2.3373],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
46 tensor(4.7693, grad_fn=<MseLossBac

         1.7131,  1.6799,  1.3739, -4.6996, -2.0891,  0.8842, -0.1896, -0.2956]) tensor([ 1.0502,  0.2783, -0.8703,  0.0421,  0.0385, -0.0130,  0.3848,  0.8579,
         1.2936,  1.1678, -0.3466,  0.5284, -0.1833,  0.0666, -0.4213,  0.0721,
         0.5603, -1.3933, -0.0097,  0.2353, -0.4415,  0.1355, -0.1044, -0.0406,
         0.2388, -0.3505, -0.2557, -0.2746,  2.0640, -0.2205,  0.5313,  0.4547,
        -0.4841,  0.5852,  0.6706, -0.6239,  2.0004,  1.9380,  0.5876,  2.3677],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
52 tensor(5.0281, grad_fn=<MseLossBackward0>) tensor([-0.7417,  0.7470, -0.6909, -0.4449,  0.2567,  0.4847,  0.3567, -0.4021,
         2.9159, -0.8987,  0.7905,  0.7893, -0.0143, -0.1498,  0.1609, -0.1524,
         0.1750, -0.2958, -0.2233,  0.3385, -0.0460,  0.1175, -0.0138, -0.1268,
        -0.0234,  0.0775,  0.4891, -2.3599,  2.5188,  0.6520,  1.0790, -0.6472,
        -0.9691, -0.7426,  0.4912,  2.0683,  1.8263, -0.2532, -0.0408,  2.2258]

57 tensor(1.2821, grad_fn=<MseLossBackward0>) tensor([ 0.2379, -0.7792,  0.9995, -1.5076,  1.5547,  0.2255,  1.4566,  1.1387,
        -3.8596, -0.4245,  0.0299,  1.2018, -1.3567,  1.2471, -0.4911,  1.2792,
         0.5400, -0.2354,  0.3312, -0.4821,  0.3707, -0.5564,  0.5355,  0.4890,
         0.5060,  0.3805, -1.3176,  1.2241, -1.7816,  1.3701, -2.0563,  1.9789,
         1.8074,  1.8699,  1.4063, -4.8695, -2.0847,  0.6944, -0.1767, -0.1278]) tensor([ 1.0997,  0.3290, -0.8248,  0.0604,  0.0284, -0.0263,  0.3989,  0.9030,
         1.2854,  1.1774, -0.3012,  0.5119, -0.1920,  0.0570, -0.4194,  0.0899,
         0.4818, -1.4740, -0.0135,  0.2462, -0.4651,  0.1522, -0.1092, -0.0298,
         0.2454, -0.3829, -0.2744, -0.2149,  2.0880, -0.2026,  0.5405,  0.4448,
        -0.4816,  0.5924,  0.6723, -0.6173,  2.0262,  1.9673,  0.6414,  2.3702],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
58 tensor(4.9557, grad_fn=<MseLossBackward0>) tensor([-0.6760,  0.7191, -0.7145

         1.8378,  1.8981,  1.4314, -4.9543, -2.0537,  0.5720, -0.2265, -0.1276]) tensor([ 1.1374,  0.3859, -0.7951,  0.0795,  0.0322, -0.0474,  0.4203,  0.9465,
         1.2648,  1.2026, -0.2802,  0.4921, -0.2080,  0.0316, -0.4039,  0.0949,
         0.4268, -1.5086, -0.0234,  0.2621, -0.4882,  0.1721, -0.1177, -0.0177,
         0.2500, -0.4150, -0.2822, -0.1704,  2.1092, -0.1824,  0.5424,  0.4362,
        -0.4782,  0.5954,  0.6727, -0.6139,  2.0498,  1.9819,  0.6918,  2.3703],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
64 tensor(4.8660, grad_fn=<MseLossBackward0>) tensor([-0.6592,  0.6764, -0.9176, -0.5315,  0.1340,  0.5301,  0.4155, -0.4172,
         2.9484, -0.6732,  0.7091,  0.5428, -0.1224, -0.1348,  0.0744, -0.1063,
         0.4190, -0.2780, -0.2716,  0.3841, -0.0317,  0.1942, -0.0584, -0.2059,
        -0.0702,  0.0482,  0.5368, -2.2255,  2.4269,  0.5985,  1.0783, -0.5970,
        -0.9209, -0.6842,  0.3643,  2.0275,  1.8391, -0.0875, -0.0128,  2.0956]

70 tensor(4.5708, grad_fn=<MseLossBackward0>) tensor([-0.9242,  0.6812, -0.3208,  1.2263, -1.3246, -0.6967, -1.3394, -0.1276,
        -0.4819, -0.6886,  0.9953, -0.0934,  0.6502, -0.5581, -0.0040, -0.3173,
         0.1715, -0.3705,  0.0252, -0.0680,  0.1054, -0.0124, -0.2475, -0.3630,
        -0.2179,  0.1161,  0.0133,  0.0838, -0.2235,  0.1842,  0.2719, -0.9102,
        -1.4907, -0.7173,  0.1639,  0.3042, -0.7931, -0.3499, -0.2063, -0.2157]) tensor([ 1.1765,  0.4447, -0.7759,  0.1279,  0.0212, -0.0808,  0.4224,  0.9614,
         1.2554,  1.2316, -0.2741,  0.4617, -0.2159, -0.0089, -0.3698,  0.0827,
         0.3834, -1.5237, -0.0376,  0.2832, -0.5156,  0.1933, -0.1285, -0.0089,
         0.2528, -0.4472, -0.2691, -0.1242,  2.1189, -0.1797,  0.5503,  0.4151,
        -0.4890,  0.5842,  0.6565, -0.5875,  2.0728,  1.9834,  0.7392,  2.3489],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
70 tensor(4.5597, grad_fn=<MseLossBackward0>) tensor([ 0.5083, -0.7546,  0.1056

75 tensor(1.2820, grad_fn=<MseLossBackward0>) tensor([ 0.2982, -0.8725,  1.0744, -1.6059,  1.6494,  0.2909,  1.5442,  1.2154,
        -4.1184, -0.5245,  0.1744,  1.0097, -1.2032,  1.1326, -0.5664,  1.1644,
         0.4652, -0.2125,  0.4223, -0.6112,  0.4655, -0.6879,  0.6603,  0.6078,
         0.6226,  0.4757, -1.6433,  1.2827, -1.8566,  1.4139, -2.0894,  2.0055,
         1.8463,  1.8912,  1.4450, -4.9916, -1.9784,  0.4316, -0.3679, -0.1131]) tensor([ 1.2105,  0.4978, -0.7499,  0.1304,  0.0469, -0.0972,  0.4625,  0.9963,
         1.2206,  1.2319, -0.2567,  0.4711, -0.2506, -0.0237, -0.3478,  0.0931,
         0.3568, -1.5405, -0.0461,  0.2987, -0.5367,  0.2062, -0.1264,  0.0045,
         0.2667, -0.4677, -0.2705, -0.1012,  2.1368, -0.1417,  0.5354,  0.4243,
        -0.4842,  0.5966,  0.6758, -0.6184,  2.0932,  1.9885,  0.7785,  2.3645],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
76 tensor(4.7023, grad_fn=<MseLossBackward0>) tensor([-7.5337e-01,  8.4179e-01,

82 tensor(4.5345, grad_fn=<MseLossBackward0>) tensor([-0.9687,  0.7537, -0.3055,  1.1888, -1.2893, -0.6762, -1.3073, -0.0840,
        -0.4439, -0.6036,  0.9630, -0.1199,  0.5906, -0.4878,  0.1067, -0.2306,
         0.1252, -0.3987,  0.0537, -0.0910,  0.0820,  0.0113, -0.2815, -0.4148,
        -0.2470,  0.0960,  0.0230,  0.0296, -0.1994,  0.1396,  0.3841, -0.9988,
        -1.5589, -0.8266,  0.0998,  0.2945, -0.7405, -0.2940, -0.1967, -0.1966]) tensor([ 1.2534,  0.5466, -0.7333,  0.1744,  0.0391, -0.1241,  0.4658,  0.9995,
         1.2082,  1.2511, -0.2580,  0.4505, -0.2597, -0.0548, -0.3200,  0.0821,
         0.3215, -1.5405, -0.0561,  0.3173, -0.5667,  0.2280, -0.1364,  0.0108,
         0.2691, -0.4867, -0.2552, -0.0694,  2.1457, -0.1486,  0.5377,  0.4069,
        -0.5003,  0.5843,  0.6574, -0.5855,  2.1153,  1.9825,  0.8183,  2.3426],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
82 tensor(4.4769, grad_fn=<MseLossBackward0>) tensor([ 0.4558, -0.6764,  0.1691

85 tensor(4.5392, grad_fn=<MseLossBackward0>) tensor([-0.9702,  0.7619, -0.3038,  1.1953, -1.2885, -0.6818, -1.3078, -0.0861,
        -0.4477, -0.6060,  0.9801, -0.1383,  0.5594, -0.4534,  0.1520, -0.1894,
         0.1196, -0.4064,  0.0548, -0.0928,  0.0821,  0.0139, -0.2864, -0.4235,
        -0.2516,  0.0920,  0.0226,  0.0322, -0.1989,  0.1416,  0.3980, -1.0106,
        -1.5728, -0.8406,  0.0973,  0.2886, -0.7374, -0.2970, -0.1965, -0.1909]) tensor([ 1.2729,  0.5685, -0.7255,  0.1864,  0.0430, -0.1346,  0.4761,  1.0033,
         1.1949,  1.2509, -0.2539,  0.4446, -0.2735, -0.0641, -0.3044,  0.0828,
         0.3097, -1.5469, -0.0605,  0.3257, -0.5781,  0.2366, -0.1391,  0.0157,
         0.2721, -0.4907, -0.2511, -0.0597,  2.1519, -0.1406,  0.5337,  0.4046,
        -0.5045,  0.5826,  0.6581, -0.5831,  2.1242,  1.9817,  0.8333,  2.3402],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
85 tensor(4.4610, grad_fn=<MseLossBackward0>) tensor([ 0.4332, -0.6495,  0.1733

        -0.5734, -0.6785, -0.5196,  1.4655,  0.1077,  0.0443, -0.2331, -0.6444]) tensor([ 1.2961,  0.5735, -0.7175,  0.1815,  0.0595, -0.1345,  0.4972,  1.0056,
         1.1904,  1.2563, -0.2615,  0.4446, -0.2847, -0.0658, -0.2985,  0.0843,
         0.3002, -1.5471, -0.0641,  0.3322, -0.5864,  0.2421, -0.1382,  0.0234,
         0.2765, -0.4934, -0.2493, -0.0538,  2.1582, -0.1373,  0.5269,  0.4135,
        -0.4904,  0.5899,  0.6582, -0.5828,  2.1373,  1.9845,  0.8449,  2.3405],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
87 tensor(3.3472, grad_fn=<MseLossBackward0>) tensor([ 0.0628, -0.1633,  0.3214,  0.4284, -0.8731, -0.6332, -0.8537,  1.0705,
         0.3175,  0.7506, -0.7515,  0.2810,  0.5380, -0.8752, -0.2927, -0.8565,
         0.8679,  0.4339, -0.0415,  0.0084,  0.1321,  0.1231, -0.2612, -0.0609,
        -0.2747, -0.0633,  0.4529,  0.1353,  0.0024,  0.4586,  1.3182, -1.6160,
        -0.8342, -1.4970,  0.5119,  0.9844,  0.4140,  0.3855, -0.1987, -0.3399]

89 tensor(4.4474, grad_fn=<MseLossBackward0>) tensor([ 0.4225, -0.6344,  0.1737,  0.9278, -0.4275, -0.6849, -0.9443, -1.1917,
         1.3343,  0.7050, -0.9860, -0.4829,  0.5948, -0.0502, -0.1604, -0.4443,
        -1.3672,  0.1723,  0.2287, -0.2032,  0.2519,  0.1491, -0.0707, -0.1139,
        -0.1352, -0.2833,  0.4414,  0.0315, -0.1609,  0.2423,  0.8435, -0.5788,
        -0.5715, -0.6760, -0.5204,  1.4569,  0.0941,  0.0513, -0.2303, -0.6341]) tensor([ 1.3093,  0.5861, -0.7124,  0.1884,  0.0628, -0.1412,  0.5048,  1.0074,
         1.1817,  1.2555, -0.2600,  0.4446, -0.2894, -0.0728, -0.2918,  0.0831,
         0.2922, -1.5506, -0.0657,  0.3363, -0.5934,  0.2479, -0.1398,  0.0269,
         0.2785, -0.4951, -0.2498, -0.0480,  2.1627, -0.1333,  0.5240,  0.4126,
        -0.4919,  0.5891,  0.6592, -0.5796,  2.1431,  1.9841,  0.8542,  2.3391],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
89 tensor(3.3447, grad_fn=<MseLossBackward0>) tensor([ 0.0608, -0.1528,  0.3264

         0.2753,  0.8711, -1.6241, -0.0931, -1.0474, -0.1534,  0.2949,  0.0052]) tensor([ 1.3208,  0.6156, -0.7140,  0.1875,  0.0784, -0.1423,  0.5318,  1.0118,
         1.1507,  1.2429, -0.2437,  0.4463, -0.3094, -0.0734, -0.2781,  0.0949,
         0.2876, -1.5559, -0.0706,  0.3451, -0.6068,  0.2530, -0.1380,  0.0345,
         0.2864, -0.4929, -0.2595, -0.0415,  2.1704, -0.1343,  0.4974,  0.4343,
        -0.4797,  0.6105,  0.6608, -0.6014,  2.1469,  1.9777,  0.8713,  2.3469],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
92 tensor(3.5694, grad_fn=<MseLossBackward0>) tensor([ 1.0780, -0.8279, -0.0173, -0.8106,  0.7435,  1.1742,  0.7569, -0.1077,
         0.5358,  0.2297, -0.0942,  0.3652, -0.4359,  0.2425,  0.5017,  0.2308,
         0.7802,  0.2194,  0.1539, -0.1298, -0.1304, -0.0905, -0.1384,  0.0956,
        -0.1737,  0.3745,  0.0041,  0.3953, -0.5486, -0.1801, -1.0322,  0.4970,
         1.2720,  0.4445,  0.5592, -0.4416,  0.7262,  0.1772, -0.2414, -0.5077]

        -0.8310, -0.5465,  0.0735,  1.9202,  1.5860, -0.0153,  0.1737,  1.8249]) tensor([ 1.3373,  0.6552, -0.7179,  0.2269,  0.0602, -0.1802,  0.5220,  1.0086,
         1.1697,  1.2468, -0.2435,  0.4477, -0.3047, -0.1079, -0.2655,  0.0731,
         0.2750, -1.5602, -0.0758,  0.3572, -0.6176,  0.2697, -0.1458,  0.0375,
         0.2869, -0.4990, -0.2420, -0.0456,  2.1978, -0.1111,  0.5263,  0.3963,
        -0.5216,  0.5727,  0.6638, -0.5501,  2.1762,  1.9777,  0.8864,  2.3493],
       grad_fn=<CatBackward0>)
torch.Size([100, 1])
torch.Size([100, 1])
97 tensor(4.5344, grad_fn=<MseLossBackward0>) tensor([-0.9407,  0.7391, -0.3601,  1.1983, -1.2465, -0.6388, -1.2700, -0.1560,
        -0.4792, -0.6035,  0.9913, -0.1655,  0.5969, -0.4671,  0.1701, -0.1960,
         0.0573, -0.4593,  0.0487, -0.1026,  0.0969,  0.0957, -0.3948, -0.5426,
        -0.3629,  0.0736, -0.0162,  0.0363, -0.1960,  0.1494,  0.4278, -1.0217,
        -1.5865, -0.8597,  0.0841,  0.2811, -0.7296, -0.2717, -0.1980, -0.1804]

In [12]:
parameters_to_vector(model.parameters())

tensor([ 1.3540,  0.6733, -0.7143,  0.2365,  0.0653, -0.1904,  0.5335,  1.0076,
         1.1570,  1.2463, -0.2433,  0.4486, -0.3119, -0.1190, -0.2571,  0.0692,
         0.2691, -1.5615, -0.0778,  0.3632, -0.6265,  0.2759, -0.1456,  0.0446,
         0.2920, -0.4999, -0.2401, -0.0386,  2.2035, -0.1063,  0.5221,  0.3958,
        -0.5244,  0.5714,  0.6644, -0.5473,  2.1844,  1.9766,  0.8974,  2.3472],
       grad_fn=<CatBackward0>)

In [13]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [14]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [15]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0 + 1.0 * float(np.random.randn(1)))
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [16]:
print(projection_list[0])

[0, 0, 0, tensor([[1.1254, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.1254, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.1254,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.1254, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.1254, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.1254]]), tensor([[1.1029, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.1029, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.1029,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.1029, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.1029, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.1029]]), 0, 0, 0, tensor([[2.5769, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 2.5769, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 2.5769,  ..., 0.0000, 0.0000, 

In [17]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [18]:
def rel_error(model):
    return (torch.norm(parameters_to_vector(model.parameters()) - datapoints[model.user_id]['exact_weights']) / torch.norm(datapoints[model.user_id]['exact_weights'])).detach()

In [19]:
torch.norm(datapoints[model.user_id]['exact_weights'])

tensor(5.0990, dtype=torch.float64)

In [20]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([-0.2020,  0.2162, -0.0128, -0.3196, -0.0927,  0.0113,  0.3231,  0.0387,
        -0.1462,  0.1075,  0.2282,  0.3164,  0.1650,  0.1298,  0.1648, -0.0678,
        -0.1029,  0.0018,  0.0480, -0.0531,  0.2296, -0.1860,  0.0809, -0.3191,
         0.1640, -0.0841,  0.0525, -0.2163, -0.0804, -0.0617, -0.0053,  0.0277,
        -0.2629,  0.2722,  0.0442, -0.1403,  0.2044,  0.4307, -0.3566,  0.3585])


tensor([-0.4040,  0.4325, -0.0256, -0.6392, -0.1855,  0.0226,  0.6463,  0.0774,
        -0.2924,  0.2150,  0.4563,  0.6329,  0.3300,  0.2597,  0.3295, -0.1355,
        -0.2058,  0.0035,  0.0960, -0.1062,  0.4592, -0.3720,  0.1618, -0.6382,
         0.3280, -0.1682,  0.1050, -0.4326, -0.1608, -0.1234, -0.0106,  0.0554,
        -0.5258,  0.5444,  0.0884, -0.2806,  0.4088,  0.8614, -0.7132,  0.7169],
       grad_fn=<CatBackward0>)

In [21]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            mat_vec_sum = torch.zeros_like(weights)
            for k in G.neighbors(i):
                mat_vec_sum = torch.add(mat_vec_sum, projected_weights[k][i] - projected_weights[i][k])
            #print(torch.outer(mat_vec_sum, weights))

            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * mat_vec_sum)
                                         
    projected_weights = []                                          
    update_ProjWeight(projection_list, projected_weights, first_run=False)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    user_rel_error = 0
    for k in range(no_users):
      
        g_loss = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        #user_rel_error += rel_error(models[i])
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #total_rel_error.append(user_rel_error / no_users)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/2000 [00:00<16:09,  2.06it/s]

Training_loss 52.58380


  0%|          | 2/2000 [00:01<17:36,  1.89it/s]

Training_loss 52.50533


  0%|          | 3/2000 [00:01<17:48,  1.87it/s]

Training_loss 52.45320


  0%|          | 4/2000 [00:02<18:46,  1.77it/s]

Training_loss 52.36617


  0%|          | 5/2000 [00:02<19:28,  1.71it/s]

Training_loss 52.27706


  0%|          | 6/2000 [00:03<19:06,  1.74it/s]

Training_loss 52.20379


  0%|          | 7/2000 [00:03<19:26,  1.71it/s]

Training_loss 52.09538


  0%|          | 8/2000 [00:04<18:40,  1.78it/s]

Training_loss 52.00725


  0%|          | 9/2000 [00:05<19:03,  1.74it/s]

Training_loss 51.91009


  0%|          | 10/2000 [00:05<17:54,  1.85it/s]

Training_loss 51.82266


  1%|          | 11/2000 [00:05<14:56,  2.22it/s]

Training_loss 51.74678


  1%|          | 12/2000 [00:06<13:02,  2.54it/s]

Training_loss 51.68568


  1%|          | 13/2000 [00:06<12:05,  2.74it/s]

Training_loss 51.59908


  1%|          | 14/2000 [00:06<13:07,  2.52it/s]

Training_loss 51.53922


  1%|          | 15/2000 [00:07<14:48,  2.23it/s]

Training_loss 51.45575


  1%|          | 16/2000 [00:07<15:48,  2.09it/s]

Training_loss 51.38778


  1%|          | 17/2000 [00:08<16:36,  1.99it/s]

Training_loss 51.28452


  1%|          | 18/2000 [00:09<17:07,  1.93it/s]

Training_loss 51.19776


  1%|          | 19/2000 [00:09<17:30,  1.89it/s]

Training_loss 51.12841


  1%|          | 20/2000 [00:10<17:43,  1.86it/s]

Training_loss 51.05602


  1%|          | 21/2000 [00:10<17:59,  1.83it/s]

Training_loss 50.98012


  1%|          | 22/2000 [00:11<18:48,  1.75it/s]

Training_loss 50.90154


  1%|          | 23/2000 [00:11<19:10,  1.72it/s]

Training_loss 50.78870


  1%|          | 24/2000 [00:12<19:47,  1.66it/s]

Training_loss 50.68604


  1%|▏         | 25/2000 [00:13<19:25,  1.69it/s]

Training_loss 50.61232


  1%|▏         | 26/2000 [00:14<21:38,  1.52it/s]

Training_loss 50.53477


  1%|▏         | 27/2000 [00:14<21:19,  1.54it/s]

Training_loss 50.46807


  1%|▏         | 28/2000 [00:15<20:29,  1.60it/s]

Training_loss 50.35956


  1%|▏         | 29/2000 [00:15<20:07,  1.63it/s]

Training_loss 50.25688


  2%|▏         | 30/2000 [00:16<20:12,  1.63it/s]

Training_loss 50.16532


  2%|▏         | 31/2000 [00:16<19:58,  1.64it/s]

Training_loss 50.09066


  2%|▏         | 32/2000 [00:17<19:58,  1.64it/s]

Training_loss 50.00283


  2%|▏         | 33/2000 [00:18<19:22,  1.69it/s]

Training_loss 49.92338


  2%|▏         | 34/2000 [00:18<19:09,  1.71it/s]

Training_loss 49.82618


  2%|▏         | 35/2000 [00:19<18:32,  1.77it/s]

Training_loss 49.71878


  2%|▏         | 36/2000 [00:19<18:26,  1.77it/s]

Training_loss 49.64970


  2%|▏         | 37/2000 [00:20<18:39,  1.75it/s]

Training_loss 49.54199


  2%|▏         | 38/2000 [00:20<18:39,  1.75it/s]

Training_loss 49.43936


  2%|▏         | 39/2000 [00:21<17:50,  1.83it/s]

Training_loss 49.32995


  2%|▏         | 40/2000 [00:21<15:08,  2.16it/s]

Training_loss 49.25017


  2%|▏         | 41/2000 [00:21<13:12,  2.47it/s]

Training_loss 49.15549


  2%|▏         | 42/2000 [00:22<12:04,  2.70it/s]

Training_loss 49.04892


  2%|▏         | 43/2000 [00:22<12:25,  2.62it/s]

Training_loss 48.97615


  2%|▏         | 44/2000 [00:23<14:09,  2.30it/s]

Training_loss 48.88836


  2%|▏         | 45/2000 [00:23<15:03,  2.16it/s]

Training_loss 48.77231


  2%|▏         | 46/2000 [00:24<15:54,  2.05it/s]

Training_loss 48.65982


  2%|▏         | 47/2000 [00:24<16:46,  1.94it/s]

Training_loss 48.58655


  2%|▏         | 48/2000 [00:25<17:10,  1.89it/s]

Training_loss 48.46437


  2%|▏         | 49/2000 [00:26<17:39,  1.84it/s]

Training_loss 48.35998


  2%|▎         | 50/2000 [00:26<17:58,  1.81it/s]

Training_loss 48.26709


  3%|▎         | 51/2000 [00:27<17:12,  1.89it/s]

Training_loss 48.15140


  3%|▎         | 52/2000 [00:27<17:11,  1.89it/s]

Training_loss 48.00655


  3%|▎         | 53/2000 [00:28<18:13,  1.78it/s]

Training_loss 47.89637


  3%|▎         | 54/2000 [00:28<18:44,  1.73it/s]

Training_loss 47.76566


  3%|▎         | 55/2000 [00:29<18:54,  1.71it/s]

Training_loss 47.68041


  3%|▎         | 56/2000 [00:30<18:46,  1.73it/s]

Training_loss 47.56578


  3%|▎         | 57/2000 [00:30<18:55,  1.71it/s]

Training_loss 47.45747


  3%|▎         | 58/2000 [00:31<18:39,  1.73it/s]

Training_loss 47.36883


  3%|▎         | 59/2000 [00:31<18:42,  1.73it/s]

Training_loss 47.28379


  3%|▎         | 60/2000 [00:32<19:04,  1.70it/s]

Training_loss 47.15496


  3%|▎         | 61/2000 [00:32<18:32,  1.74it/s]

Training_loss 47.05976


  3%|▎         | 62/2000 [00:33<18:41,  1.73it/s]

Training_loss 46.93666


  3%|▎         | 63/2000 [00:34<18:38,  1.73it/s]

Training_loss 46.84056


  3%|▎         | 64/2000 [00:34<18:50,  1.71it/s]

Training_loss 46.71031


  3%|▎         | 65/2000 [00:35<18:22,  1.75it/s]

Training_loss 46.63583


  3%|▎         | 66/2000 [00:35<18:12,  1.77it/s]

Training_loss 46.50308


  3%|▎         | 67/2000 [00:36<17:53,  1.80it/s]

Training_loss 46.32920


  3%|▎         | 68/2000 [00:36<17:48,  1.81it/s]

Training_loss 46.18005


  3%|▎         | 69/2000 [00:37<17:41,  1.82it/s]

Training_loss 46.06087


  4%|▎         | 70/2000 [00:37<14:45,  2.18it/s]

Training_loss 45.94839


  4%|▎         | 71/2000 [00:37<12:47,  2.51it/s]

Training_loss 45.82848


  4%|▎         | 72/2000 [00:38<11:33,  2.78it/s]

Training_loss 45.71049


  4%|▎         | 73/2000 [00:38<11:34,  2.78it/s]

Training_loss 45.54929


  4%|▎         | 74/2000 [00:39<12:57,  2.48it/s]

Training_loss 45.39100


  4%|▍         | 75/2000 [00:39<14:41,  2.18it/s]

Training_loss 45.19751


  4%|▍         | 76/2000 [00:40<15:37,  2.05it/s]

Training_loss 45.02728


  4%|▍         | 77/2000 [00:40<16:05,  1.99it/s]

Training_loss 44.88986


  4%|▍         | 78/2000 [00:41<16:23,  1.95it/s]

Training_loss 44.75673


  4%|▍         | 79/2000 [00:41<16:35,  1.93it/s]

Training_loss 44.56197


  4%|▍         | 80/2000 [00:42<17:27,  1.83it/s]

Training_loss 44.45908


  4%|▍         | 81/2000 [00:42<17:42,  1.81it/s]

Training_loss 44.28400


  4%|▍         | 82/2000 [00:43<17:32,  1.82it/s]

Training_loss 44.10706


  4%|▍         | 83/2000 [00:44<18:01,  1.77it/s]

Training_loss 43.97308


  4%|▍         | 84/2000 [00:44<18:03,  1.77it/s]

Training_loss 43.82450


  4%|▍         | 85/2000 [00:45<17:42,  1.80it/s]

Training_loss 43.70168


  4%|▍         | 86/2000 [00:45<17:36,  1.81it/s]

Training_loss 43.57644


  4%|▍         | 87/2000 [00:46<17:21,  1.84it/s]

Training_loss 43.42443


  4%|▍         | 88/2000 [00:46<17:09,  1.86it/s]

Training_loss 43.27765


  4%|▍         | 89/2000 [00:47<17:40,  1.80it/s]

Training_loss 43.13688


  4%|▍         | 90/2000 [00:47<17:52,  1.78it/s]

Training_loss 43.01208


  5%|▍         | 91/2000 [00:48<17:58,  1.77it/s]

Training_loss 42.80071


  5%|▍         | 92/2000 [00:49<17:55,  1.77it/s]

Training_loss 42.58797


  5%|▍         | 93/2000 [00:49<18:15,  1.74it/s]

Training_loss 42.40963


  5%|▍         | 94/2000 [00:50<18:42,  1.70it/s]

Training_loss 42.20005


  5%|▍         | 95/2000 [00:50<18:12,  1.74it/s]

Training_loss 42.03570


  5%|▍         | 96/2000 [00:51<18:07,  1.75it/s]

Training_loss 41.89719


  5%|▍         | 97/2000 [00:51<17:42,  1.79it/s]

Training_loss 41.74498


  5%|▍         | 98/2000 [00:52<17:46,  1.78it/s]

Training_loss 41.56564


  5%|▍         | 99/2000 [00:53<18:01,  1.76it/s]

Training_loss 41.41620


  5%|▌         | 100/2000 [00:53<16:44,  1.89it/s]

Training_loss 41.30458


  5%|▌         | 101/2000 [00:53<14:41,  2.16it/s]

Training_loss 41.11735


  5%|▌         | 102/2000 [00:54<13:05,  2.42it/s]

Training_loss 40.93425


  5%|▌         | 103/2000 [00:54<12:36,  2.51it/s]

Training_loss 40.74720


  5%|▌         | 104/2000 [00:55<14:17,  2.21it/s]

Training_loss 40.55058


  5%|▌         | 105/2000 [00:55<15:42,  2.01it/s]

Training_loss 40.35986


  5%|▌         | 106/2000 [00:56<15:53,  1.99it/s]

Training_loss 40.17724


  5%|▌         | 107/2000 [00:56<16:00,  1.97it/s]

Training_loss 40.00615


  5%|▌         | 108/2000 [00:57<16:46,  1.88it/s]

Training_loss 39.81226


  5%|▌         | 109/2000 [00:57<17:23,  1.81it/s]

Training_loss 39.65482


  6%|▌         | 110/2000 [00:58<17:37,  1.79it/s]

Training_loss 39.48946


  6%|▌         | 111/2000 [00:59<17:13,  1.83it/s]

Training_loss 39.34392


  6%|▌         | 112/2000 [00:59<17:02,  1.85it/s]

Training_loss 39.18150


  6%|▌         | 113/2000 [01:00<16:51,  1.86it/s]

Training_loss 39.00502


  6%|▌         | 114/2000 [01:00<17:08,  1.83it/s]

Training_loss 38.82779


  6%|▌         | 115/2000 [01:01<16:43,  1.88it/s]

Training_loss 38.65213


  6%|▌         | 116/2000 [01:01<16:23,  1.91it/s]

Training_loss 38.46015


  6%|▌         | 117/2000 [01:02<17:17,  1.81it/s]

Training_loss 38.26949


  6%|▌         | 118/2000 [01:02<17:08,  1.83it/s]

Training_loss 38.11244


  6%|▌         | 119/2000 [01:03<17:18,  1.81it/s]

Training_loss 37.95718


  6%|▌         | 120/2000 [01:03<16:58,  1.85it/s]

Training_loss 37.74904


  6%|▌         | 121/2000 [01:04<17:23,  1.80it/s]

Training_loss 37.54256


  6%|▌         | 122/2000 [01:04<16:59,  1.84it/s]

Training_loss 37.31896


  6%|▌         | 123/2000 [01:05<17:01,  1.84it/s]

Training_loss 37.16221


  6%|▌         | 124/2000 [01:06<17:00,  1.84it/s]

Training_loss 36.99782


  6%|▋         | 125/2000 [01:06<17:27,  1.79it/s]

Training_loss 36.90160


  6%|▋         | 126/2000 [01:07<17:23,  1.80it/s]

Training_loss 36.68633


  6%|▋         | 127/2000 [01:07<17:39,  1.77it/s]

Training_loss 36.52809


  6%|▋         | 128/2000 [01:08<17:33,  1.78it/s]

Training_loss 36.33396


  6%|▋         | 129/2000 [01:08<17:40,  1.76it/s]

Training_loss 36.14213


  6%|▋         | 130/2000 [01:09<17:25,  1.79it/s]

Training_loss 36.00296


  7%|▋         | 131/2000 [01:09<14:48,  2.10it/s]

Training_loss 35.89156


  7%|▋         | 132/2000 [01:10<13:06,  2.37it/s]

Training_loss 35.71940


  7%|▋         | 133/2000 [01:10<11:27,  2.71it/s]

Training_loss 35.52559


  7%|▋         | 134/2000 [01:10<12:19,  2.52it/s]

Training_loss 35.35956


  7%|▋         | 135/2000 [01:11<13:53,  2.24it/s]

Training_loss 35.19851


  7%|▋         | 136/2000 [01:11<14:55,  2.08it/s]

Training_loss 35.04802


  7%|▋         | 137/2000 [01:12<15:39,  1.98it/s]

Training_loss 34.93495


  7%|▋         | 138/2000 [01:12<15:59,  1.94it/s]

Training_loss 34.71684


  7%|▋         | 139/2000 [01:13<16:32,  1.88it/s]

Training_loss 34.53234


  7%|▋         | 140/2000 [01:14<16:40,  1.86it/s]

Training_loss 34.43512


  7%|▋         | 141/2000 [01:14<16:53,  1.83it/s]

Training_loss 34.29930


  7%|▋         | 142/2000 [01:15<17:05,  1.81it/s]

Training_loss 34.12560


  7%|▋         | 143/2000 [01:15<17:51,  1.73it/s]

Training_loss 33.99874


  7%|▋         | 144/2000 [01:16<17:40,  1.75it/s]

Training_loss 33.82139


  7%|▋         | 145/2000 [01:17<17:43,  1.74it/s]

Training_loss 33.65031


  7%|▋         | 146/2000 [01:17<17:35,  1.76it/s]

Training_loss 33.48246


  7%|▋         | 147/2000 [01:18<17:36,  1.75it/s]

Training_loss 33.28413


  7%|▋         | 148/2000 [01:18<17:39,  1.75it/s]

Training_loss 33.17219


  7%|▋         | 149/2000 [01:19<17:43,  1.74it/s]

Training_loss 33.06283


  8%|▊         | 150/2000 [01:19<17:18,  1.78it/s]

Training_loss 32.88395


  8%|▊         | 151/2000 [01:20<17:13,  1.79it/s]

Training_loss 32.70149


  8%|▊         | 152/2000 [01:20<17:10,  1.79it/s]

Training_loss 32.54521


  8%|▊         | 153/2000 [01:21<17:29,  1.76it/s]

Training_loss 32.39519


  8%|▊         | 154/2000 [01:22<17:15,  1.78it/s]

Training_loss 32.22584


  8%|▊         | 155/2000 [01:22<17:25,  1.77it/s]

Training_loss 32.06135


  8%|▊         | 156/2000 [01:23<17:06,  1.80it/s]

Training_loss 31.85597


  8%|▊         | 157/2000 [01:23<17:16,  1.78it/s]

Training_loss 31.71247


  8%|▊         | 158/2000 [01:24<17:30,  1.75it/s]

Training_loss 31.58401


  8%|▊         | 159/2000 [01:24<17:57,  1.71it/s]

Training_loss 31.45951


  8%|▊         | 160/2000 [01:25<17:18,  1.77it/s]

Training_loss 31.30740


  8%|▊         | 161/2000 [01:25<14:32,  2.11it/s]

Training_loss 31.17544


  8%|▊         | 162/2000 [01:26<12:47,  2.39it/s]

Training_loss 31.02400


  8%|▊         | 163/2000 [01:26<12:20,  2.48it/s]

Training_loss 30.90160


  8%|▊         | 164/2000 [01:26<13:15,  2.31it/s]

Training_loss 30.75008


  8%|▊         | 165/2000 [01:27<14:21,  2.13it/s]

Training_loss 30.56111


  8%|▊         | 166/2000 [01:28<15:16,  2.00it/s]

Training_loss 30.43520


  8%|▊         | 167/2000 [01:28<15:59,  1.91it/s]

Training_loss 30.33660


  8%|▊         | 168/2000 [01:29<16:41,  1.83it/s]

Training_loss 30.18210


  8%|▊         | 169/2000 [01:29<17:29,  1.75it/s]

Training_loss 30.03294


  8%|▊         | 170/2000 [01:30<17:53,  1.71it/s]

Training_loss 29.91356


  9%|▊         | 171/2000 [01:30<17:16,  1.77it/s]

Training_loss 29.76876


  9%|▊         | 172/2000 [01:31<16:50,  1.81it/s]

Training_loss 29.59552


  9%|▊         | 173/2000 [01:32<16:41,  1.82it/s]

Training_loss 29.47471


  9%|▊         | 174/2000 [01:32<17:07,  1.78it/s]

Training_loss 29.35157


  9%|▉         | 175/2000 [01:33<17:12,  1.77it/s]

Training_loss 29.20661


  9%|▉         | 176/2000 [01:33<17:02,  1.78it/s]

Training_loss 29.04370


  9%|▉         | 177/2000 [01:34<17:05,  1.78it/s]

Training_loss 28.96756


  9%|▉         | 178/2000 [01:34<17:03,  1.78it/s]

Training_loss 28.81017


  9%|▉         | 179/2000 [01:35<16:44,  1.81it/s]

Training_loss 28.65651


  9%|▉         | 180/2000 [01:36<17:04,  1.78it/s]

Training_loss 28.59655


  9%|▉         | 181/2000 [01:36<16:45,  1.81it/s]

Training_loss 28.45588


  9%|▉         | 182/2000 [01:37<17:11,  1.76it/s]

Training_loss 28.32931


  9%|▉         | 183/2000 [01:37<17:20,  1.75it/s]

Training_loss 28.19968


  9%|▉         | 184/2000 [01:38<16:55,  1.79it/s]

Training_loss 28.09595


  9%|▉         | 185/2000 [01:38<16:40,  1.81it/s]

Training_loss 27.98674


  9%|▉         | 186/2000 [01:39<17:07,  1.77it/s]

Training_loss 27.86426


  9%|▉         | 187/2000 [01:39<16:31,  1.83it/s]

Training_loss 27.75141


  9%|▉         | 188/2000 [01:40<16:16,  1.86it/s]

Training_loss 27.64669


  9%|▉         | 189/2000 [01:40<16:30,  1.83it/s]

Training_loss 27.51634


 10%|▉         | 190/2000 [01:41<16:22,  1.84it/s]

Training_loss 27.42027


 10%|▉         | 191/2000 [01:41<13:43,  2.20it/s]

Training_loss 27.29830


 10%|▉         | 192/2000 [01:42<12:05,  2.49it/s]

Training_loss 27.15446


 10%|▉         | 193/2000 [01:42<10:55,  2.76it/s]

Training_loss 27.04239


 10%|▉         | 194/2000 [01:42<11:23,  2.64it/s]

Training_loss 26.94973


 10%|▉         | 195/2000 [01:43<12:22,  2.43it/s]

Training_loss 26.84006


 10%|▉         | 196/2000 [01:43<13:49,  2.17it/s]

Training_loss 26.72896


 10%|▉         | 197/2000 [01:44<14:28,  2.08it/s]

Training_loss 26.66417


 10%|▉         | 198/2000 [01:44<15:17,  1.96it/s]

Training_loss 26.56521


 10%|▉         | 199/2000 [01:45<15:31,  1.93it/s]

Training_loss 26.48487


 10%|█         | 200/2000 [01:46<16:11,  1.85it/s]

Training_loss 26.39066


 10%|█         | 201/2000 [01:46<16:17,  1.84it/s]

Training_loss 26.32578


 10%|█         | 202/2000 [01:47<16:28,  1.82it/s]

Training_loss 26.21819


 10%|█         | 203/2000 [01:47<16:19,  1.84it/s]

Training_loss 26.07738


 10%|█         | 204/2000 [01:47<14:04,  2.13it/s]

Training_loss 25.97466


 10%|█         | 205/2000 [01:48<13:08,  2.28it/s]

Training_loss 25.86726


 10%|█         | 206/2000 [01:48<13:53,  2.15it/s]

Training_loss 25.76411


 10%|█         | 207/2000 [01:49<14:46,  2.02it/s]

Training_loss 25.65074


 10%|█         | 208/2000 [01:49<14:25,  2.07it/s]

Training_loss 25.52576


 10%|█         | 209/2000 [01:50<14:04,  2.12it/s]

Training_loss 25.41437


 10%|█         | 210/2000 [01:50<13:25,  2.22it/s]

Training_loss 25.29405


 11%|█         | 211/2000 [01:51<13:55,  2.14it/s]

Training_loss 25.18852


 11%|█         | 212/2000 [01:51<15:29,  1.92it/s]

Training_loss 25.11234


 11%|█         | 213/2000 [01:52<16:37,  1.79it/s]

Training_loss 24.97796


 11%|█         | 214/2000 [01:53<17:20,  1.72it/s]

Training_loss 24.84677


 11%|█         | 215/2000 [01:53<17:48,  1.67it/s]

Training_loss 24.71558


 11%|█         | 216/2000 [01:54<18:15,  1.63it/s]

Training_loss 24.63653


 11%|█         | 217/2000 [01:54<16:23,  1.81it/s]

Training_loss 24.49738


 11%|█         | 218/2000 [01:55<16:06,  1.84it/s]

Training_loss 24.41281


 11%|█         | 219/2000 [01:55<16:53,  1.76it/s]

Training_loss 24.33159


 11%|█         | 220/2000 [01:56<17:38,  1.68it/s]

Training_loss 24.23090


 11%|█         | 221/2000 [01:57<17:57,  1.65it/s]

Training_loss 24.11769


 11%|█         | 222/2000 [01:57<16:24,  1.81it/s]

Training_loss 24.02697


 11%|█         | 223/2000 [01:58<14:47,  2.00it/s]

Training_loss 23.94407


 11%|█         | 224/2000 [01:58<13:19,  2.22it/s]

Training_loss 23.86730


 11%|█▏        | 225/2000 [01:58<12:58,  2.28it/s]

Training_loss 23.78634


 11%|█▏        | 226/2000 [01:59<13:39,  2.16it/s]

Training_loss 23.71575


 11%|█▏        | 227/2000 [01:59<15:09,  1.95it/s]

Training_loss 23.66434


 11%|█▏        | 228/2000 [02:00<15:56,  1.85it/s]

Training_loss 23.59095


 11%|█▏        | 229/2000 [02:01<16:39,  1.77it/s]

Training_loss 23.48225


 12%|█▏        | 230/2000 [02:01<16:48,  1.76it/s]

Training_loss 23.40751


 12%|█▏        | 231/2000 [02:02<14:36,  2.02it/s]

Training_loss 23.31723


 12%|█▏        | 232/2000 [02:02<14:14,  2.07it/s]

Training_loss 23.19673


 12%|█▏        | 233/2000 [02:03<15:22,  1.92it/s]

Training_loss 23.12732


 12%|█▏        | 234/2000 [02:03<16:01,  1.84it/s]

Training_loss 23.03563


 12%|█▏        | 235/2000 [02:04<16:14,  1.81it/s]

Training_loss 22.92299


 12%|█▏        | 236/2000 [02:04<16:48,  1.75it/s]

Training_loss 22.81363


 12%|█▏        | 237/2000 [02:05<16:44,  1.75it/s]

Training_loss 22.73049


 12%|█▏        | 238/2000 [02:06<17:08,  1.71it/s]

Training_loss 22.61651


 12%|█▏        | 239/2000 [02:06<17:26,  1.68it/s]

Training_loss 22.51093


 12%|█▏        | 240/2000 [02:07<17:37,  1.66it/s]

Training_loss 22.41104


 12%|█▏        | 241/2000 [02:07<17:24,  1.68it/s]

Training_loss 22.36845


 12%|█▏        | 242/2000 [02:08<16:59,  1.72it/s]

Training_loss 22.24385


 12%|█▏        | 243/2000 [02:09<16:58,  1.73it/s]

Training_loss 22.15000


 12%|█▏        | 244/2000 [02:09<16:34,  1.77it/s]

Training_loss 22.02575


 12%|█▏        | 245/2000 [02:10<17:10,  1.70it/s]

Training_loss 21.95325


 12%|█▏        | 246/2000 [02:10<16:58,  1.72it/s]

Training_loss 21.83702


 12%|█▏        | 247/2000 [02:11<16:47,  1.74it/s]

Training_loss 21.75900


 12%|█▏        | 248/2000 [02:11<16:26,  1.78it/s]

Training_loss 21.64838


 12%|█▏        | 249/2000 [02:12<16:25,  1.78it/s]

Training_loss 21.58726


 12%|█▎        | 250/2000 [02:13<16:36,  1.76it/s]

Training_loss 21.48806


 13%|█▎        | 251/2000 [02:13<16:06,  1.81it/s]

Training_loss 21.40716


 13%|█▎        | 252/2000 [02:13<13:22,  2.18it/s]

Training_loss 21.36576


 13%|█▎        | 253/2000 [02:14<12:11,  2.39it/s]

Training_loss 21.26944


 13%|█▎        | 254/2000 [02:14<11:29,  2.53it/s]

Training_loss 21.19951


 13%|█▎        | 255/2000 [02:14<11:39,  2.50it/s]

Training_loss 21.13746


 13%|█▎        | 256/2000 [02:15<13:18,  2.19it/s]

Training_loss 21.08132


 13%|█▎        | 257/2000 [02:16<14:19,  2.03it/s]

Training_loss 21.00276


 13%|█▎        | 258/2000 [02:16<14:46,  1.97it/s]

Training_loss 20.92697


 13%|█▎        | 259/2000 [02:17<15:00,  1.93it/s]

Training_loss 20.82335


 13%|█▎        | 260/2000 [02:17<15:15,  1.90it/s]

Training_loss 20.76098


 13%|█▎        | 261/2000 [02:18<15:30,  1.87it/s]

Training_loss 20.68139


 13%|█▎        | 262/2000 [02:18<16:15,  1.78it/s]

Training_loss 20.56679


 13%|█▎        | 263/2000 [02:19<16:26,  1.76it/s]

Training_loss 20.46097


 13%|█▎        | 264/2000 [02:20<16:25,  1.76it/s]

Training_loss 20.38940


 13%|█▎        | 265/2000 [02:20<16:20,  1.77it/s]

Training_loss 20.30618


 13%|█▎        | 266/2000 [02:21<16:30,  1.75it/s]

Training_loss 20.23354


 13%|█▎        | 267/2000 [02:21<16:43,  1.73it/s]

Training_loss 20.18781


 13%|█▎        | 268/2000 [02:22<16:33,  1.74it/s]

Training_loss 20.10784


 13%|█▎        | 269/2000 [02:22<16:15,  1.78it/s]

Training_loss 20.01155


 14%|█▎        | 270/2000 [02:23<16:28,  1.75it/s]

Training_loss 19.96278


 14%|█▎        | 271/2000 [02:24<16:24,  1.76it/s]

Training_loss 19.86481


 14%|█▎        | 272/2000 [02:24<15:39,  1.84it/s]

Training_loss 19.83122


 14%|█▎        | 273/2000 [02:25<16:02,  1.79it/s]

Training_loss 19.70885


 14%|█▎        | 274/2000 [02:25<16:21,  1.76it/s]

Training_loss 19.63295


 14%|█▍        | 275/2000 [02:26<16:26,  1.75it/s]

Training_loss 19.55013


 14%|█▍        | 276/2000 [02:26<16:34,  1.73it/s]

Training_loss 19.50691


 14%|█▍        | 277/2000 [02:27<16:25,  1.75it/s]

Training_loss 19.43426


 14%|█▍        | 278/2000 [02:27<16:33,  1.73it/s]

Training_loss 19.32980


 14%|█▍        | 279/2000 [02:28<16:34,  1.73it/s]

Training_loss 19.26245


 14%|█▍        | 280/2000 [02:29<16:43,  1.71it/s]

Training_loss 19.20747


 14%|█▍        | 281/2000 [02:29<15:29,  1.85it/s]

Training_loss 19.12377


 14%|█▍        | 282/2000 [02:29<13:13,  2.17it/s]

Training_loss 19.05544


 14%|█▍        | 283/2000 [02:30<12:05,  2.37it/s]

Training_loss 18.97471


 14%|█▍        | 284/2000 [02:30<11:19,  2.53it/s]

Training_loss 18.92020


 14%|█▍        | 285/2000 [02:31<13:00,  2.20it/s]

Training_loss 18.86175


 14%|█▍        | 286/2000 [02:31<14:08,  2.02it/s]

Training_loss 18.76370


 14%|█▍        | 287/2000 [02:32<14:28,  1.97it/s]

Training_loss 18.71482


 14%|█▍        | 288/2000 [02:32<14:59,  1.90it/s]

Training_loss 18.64807


 14%|█▍        | 289/2000 [02:33<15:18,  1.86it/s]

Training_loss 18.55499


 14%|█▍        | 290/2000 [02:34<15:51,  1.80it/s]

Training_loss 18.49187


 15%|█▍        | 291/2000 [02:34<15:57,  1.78it/s]

Training_loss 18.42956


 15%|█▍        | 292/2000 [02:35<16:26,  1.73it/s]

Training_loss 18.36230


 15%|█▍        | 293/2000 [02:35<16:38,  1.71it/s]

Training_loss 18.27431


 15%|█▍        | 294/2000 [02:36<16:17,  1.74it/s]

Training_loss 18.18883


 15%|█▍        | 295/2000 [02:36<16:56,  1.68it/s]

Training_loss 18.14849


 15%|█▍        | 296/2000 [02:37<17:14,  1.65it/s]

Training_loss 18.07707


 15%|█▍        | 297/2000 [02:38<17:07,  1.66it/s]

Training_loss 18.00534


 15%|█▍        | 298/2000 [02:38<16:51,  1.68it/s]

Training_loss 17.95648


 15%|█▍        | 299/2000 [02:39<16:39,  1.70it/s]

Training_loss 17.88603


 15%|█▌        | 300/2000 [02:39<16:56,  1.67it/s]

Training_loss 17.83616


 15%|█▌        | 301/2000 [02:40<17:15,  1.64it/s]

Training_loss 17.77328


 15%|█▌        | 302/2000 [02:41<17:36,  1.61it/s]

Training_loss 17.71649


 15%|█▌        | 303/2000 [02:41<15:37,  1.81it/s]

Training_loss 17.70346


 15%|█▌        | 304/2000 [02:42<13:55,  2.03it/s]

Training_loss 17.66383


 15%|█▌        | 305/2000 [02:42<14:40,  1.93it/s]

Training_loss 17.61917


 15%|█▌        | 306/2000 [02:43<15:10,  1.86it/s]

Training_loss 17.57434


 15%|█▌        | 307/2000 [02:43<15:23,  1.83it/s]

Training_loss 17.51866


 15%|█▌        | 308/2000 [02:44<15:26,  1.83it/s]

Training_loss 17.47556


 15%|█▌        | 309/2000 [02:44<15:50,  1.78it/s]

Training_loss 17.43777


 16%|█▌        | 310/2000 [02:45<15:40,  1.80it/s]

Training_loss 17.36513


 16%|█▌        | 311/2000 [02:45<13:28,  2.09it/s]

Training_loss 17.32147


 16%|█▌        | 312/2000 [02:46<11:46,  2.39it/s]

Training_loss 17.27460


 16%|█▌        | 313/2000 [02:46<10:35,  2.65it/s]

Training_loss 17.23645


 16%|█▌        | 314/2000 [02:46<11:04,  2.54it/s]

Training_loss 17.20139


 16%|█▌        | 315/2000 [02:47<12:59,  2.16it/s]

Training_loss 17.14460


 16%|█▌        | 316/2000 [02:47<13:57,  2.01it/s]

Training_loss 17.10733


 16%|█▌        | 317/2000 [02:48<14:09,  1.98it/s]

Training_loss 17.05345


 16%|█▌        | 318/2000 [02:48<14:23,  1.95it/s]

Training_loss 17.01248


 16%|█▌        | 319/2000 [02:49<14:24,  1.94it/s]

Training_loss 16.97637


 16%|█▌        | 320/2000 [02:50<15:04,  1.86it/s]

Training_loss 16.92861


 16%|█▌        | 321/2000 [02:50<15:13,  1.84it/s]

Training_loss 16.90311


 16%|█▌        | 322/2000 [02:51<15:41,  1.78it/s]

Training_loss 16.83917


 16%|█▌        | 323/2000 [02:51<15:49,  1.77it/s]

Training_loss 16.81562


 16%|█▌        | 324/2000 [02:52<15:33,  1.80it/s]

Training_loss 16.79546


 16%|█▋        | 325/2000 [02:52<15:51,  1.76it/s]

Training_loss 16.73954


 16%|█▋        | 326/2000 [02:53<15:35,  1.79it/s]

Training_loss 16.70366


 16%|█▋        | 327/2000 [02:54<16:09,  1.73it/s]

Training_loss 16.65863


 16%|█▋        | 328/2000 [02:54<16:36,  1.68it/s]

Training_loss 16.61901


 16%|█▋        | 329/2000 [02:55<16:40,  1.67it/s]

Training_loss 16.58395


 16%|█▋        | 330/2000 [02:55<16:12,  1.72it/s]

Training_loss 16.54758


 17%|█▋        | 331/2000 [02:56<16:49,  1.65it/s]

Training_loss 16.52321


 17%|█▋        | 332/2000 [02:57<15:53,  1.75it/s]

Training_loss 16.50213


 17%|█▋        | 333/2000 [02:57<16:14,  1.71it/s]

Training_loss 16.47852


 17%|█▋        | 334/2000 [02:58<16:35,  1.67it/s]

Training_loss 16.42779


 17%|█▋        | 335/2000 [02:58<17:13,  1.61it/s]

Training_loss 16.40595


 17%|█▋        | 336/2000 [02:59<17:39,  1.57it/s]

Training_loss 16.38407


 17%|█▋        | 337/2000 [03:00<18:21,  1.51it/s]

Training_loss 16.36031


 17%|█▋        | 338/2000 [03:00<18:01,  1.54it/s]

Training_loss 16.33169


 17%|█▋        | 339/2000 [03:01<17:07,  1.62it/s]

Training_loss 16.32833


 17%|█▋        | 340/2000 [03:01<14:17,  1.94it/s]

Training_loss 16.29013


 17%|█▋        | 341/2000 [03:02<12:36,  2.19it/s]

Training_loss 16.28001


 17%|█▋        | 342/2000 [03:02<11:14,  2.46it/s]

Training_loss 16.26396


 17%|█▋        | 343/2000 [03:02<11:11,  2.47it/s]

Training_loss 16.23858


 17%|█▋        | 344/2000 [03:03<12:30,  2.21it/s]

Training_loss 16.21638


 17%|█▋        | 345/2000 [03:03<12:54,  2.14it/s]

Training_loss 16.18254


 17%|█▋        | 346/2000 [03:04<13:59,  1.97it/s]

Training_loss 16.16191


 17%|█▋        | 347/2000 [03:05<15:06,  1.82it/s]

Training_loss 16.14425


 17%|█▋        | 348/2000 [03:05<15:22,  1.79it/s]

Training_loss 16.09563


 17%|█▋        | 349/2000 [03:06<15:19,  1.79it/s]

Training_loss 16.06979


 18%|█▊        | 350/2000 [03:06<15:22,  1.79it/s]

Training_loss 16.05330


 18%|█▊        | 351/2000 [03:07<15:46,  1.74it/s]

Training_loss 16.04227


 18%|█▊        | 352/2000 [03:08<15:47,  1.74it/s]

Training_loss 16.01679


 18%|█▊        | 353/2000 [03:08<15:41,  1.75it/s]

Training_loss 15.98337


 18%|█▊        | 354/2000 [03:09<15:48,  1.74it/s]

Training_loss 15.96466


 18%|█▊        | 355/2000 [03:09<15:57,  1.72it/s]

Training_loss 15.93667


 18%|█▊        | 356/2000 [03:10<15:56,  1.72it/s]

Training_loss 15.90218


 18%|█▊        | 357/2000 [03:10<15:36,  1.75it/s]

Training_loss 15.89492


 18%|█▊        | 358/2000 [03:11<15:23,  1.78it/s]

Training_loss 15.88262


 18%|█▊        | 359/2000 [03:12<15:40,  1.74it/s]

Training_loss 15.84031


 18%|█▊        | 360/2000 [03:12<15:34,  1.75it/s]

Training_loss 15.81745


 18%|█▊        | 361/2000 [03:13<15:29,  1.76it/s]

Training_loss 15.78616


 18%|█▊        | 362/2000 [03:13<15:20,  1.78it/s]

Training_loss 15.76906


 18%|█▊        | 363/2000 [03:14<15:27,  1.76it/s]

Training_loss 15.75202


 18%|█▊        | 364/2000 [03:14<15:40,  1.74it/s]

Training_loss 15.71859


 18%|█▊        | 365/2000 [03:15<15:28,  1.76it/s]

Training_loss 15.69647


 18%|█▊        | 366/2000 [03:15<15:18,  1.78it/s]

Training_loss 15.67508


 18%|█▊        | 367/2000 [03:16<15:36,  1.74it/s]

Training_loss 15.64808


 18%|█▊        | 368/2000 [03:17<15:41,  1.73it/s]

Training_loss 15.62617


 18%|█▊        | 369/2000 [03:17<14:22,  1.89it/s]

Training_loss 15.60547


 18%|█▊        | 370/2000 [03:17<12:17,  2.21it/s]

Training_loss 15.58328


 19%|█▊        | 371/2000 [03:18<11:52,  2.29it/s]

Training_loss 15.55457


 19%|█▊        | 372/2000 [03:18<10:37,  2.55it/s]

Training_loss 15.54307


 19%|█▊        | 373/2000 [03:19<11:23,  2.38it/s]

Training_loss 15.51932


 19%|█▊        | 374/2000 [03:19<12:16,  2.21it/s]

Training_loss 15.50167


 19%|█▉        | 375/2000 [03:20<13:01,  2.08it/s]

Training_loss 15.48794


 19%|█▉        | 376/2000 [03:20<13:52,  1.95it/s]

Training_loss 15.46990


 19%|█▉        | 377/2000 [03:21<14:39,  1.85it/s]

Training_loss 15.45177


 19%|█▉        | 378/2000 [03:21<14:35,  1.85it/s]

Training_loss 15.43800


 19%|█▉        | 379/2000 [03:22<14:40,  1.84it/s]

Training_loss 15.41013


 19%|█▉        | 380/2000 [03:22<15:05,  1.79it/s]

Training_loss 15.38224


 19%|█▉        | 381/2000 [03:23<15:25,  1.75it/s]

Training_loss 15.35934


 19%|█▉        | 382/2000 [03:24<15:15,  1.77it/s]

Training_loss 15.33196


 19%|█▉        | 383/2000 [03:24<15:12,  1.77it/s]

Training_loss 15.31273


 19%|█▉        | 384/2000 [03:25<15:03,  1.79it/s]

Training_loss 15.29765


 19%|█▉        | 385/2000 [03:25<15:26,  1.74it/s]

Training_loss 15.28183


 19%|█▉        | 386/2000 [03:26<15:17,  1.76it/s]

Training_loss 15.26892


 19%|█▉        | 387/2000 [03:27<15:32,  1.73it/s]

Training_loss 15.25315


 19%|█▉        | 388/2000 [03:27<15:33,  1.73it/s]

Training_loss 15.23616


 19%|█▉        | 389/2000 [03:28<15:42,  1.71it/s]

Training_loss 15.21188


 20%|█▉        | 390/2000 [03:28<15:47,  1.70it/s]

Training_loss 15.18836


 20%|█▉        | 391/2000 [03:29<14:59,  1.79it/s]

Training_loss 15.18170


 20%|█▉        | 392/2000 [03:29<15:06,  1.77it/s]

Training_loss 15.14485


 20%|█▉        | 393/2000 [03:30<15:32,  1.72it/s]

Training_loss 15.13229


 20%|█▉        | 394/2000 [03:31<15:31,  1.72it/s]

Training_loss 15.11170


 20%|█▉        | 395/2000 [03:31<15:10,  1.76it/s]

Training_loss 15.08741


 20%|█▉        | 396/2000 [03:32<15:17,  1.75it/s]

Training_loss 15.07364


 20%|█▉        | 397/2000 [03:32<15:10,  1.76it/s]

Training_loss 15.06314


 20%|█▉        | 398/2000 [03:33<15:51,  1.68it/s]

Training_loss 15.04171


 20%|█▉        | 399/2000 [03:33<14:15,  1.87it/s]

Training_loss 15.02359


 20%|██        | 400/2000 [03:34<12:48,  2.08it/s]

Training_loss 14.99900


 20%|██        | 401/2000 [03:34<11:27,  2.33it/s]

Training_loss 14.98757


 20%|██        | 402/2000 [03:34<10:43,  2.48it/s]

Training_loss 14.97420


 20%|██        | 403/2000 [03:35<11:33,  2.30it/s]

Training_loss 14.96158


 20%|██        | 404/2000 [03:35<12:51,  2.07it/s]

Training_loss 14.94745


 20%|██        | 405/2000 [03:36<13:40,  1.94it/s]

Training_loss 14.92252


 20%|██        | 406/2000 [03:37<14:32,  1.83it/s]

Training_loss 14.91071


 20%|██        | 407/2000 [03:37<14:40,  1.81it/s]

Training_loss 14.89882


 20%|██        | 408/2000 [03:38<14:55,  1.78it/s]

Training_loss 14.88880


 20%|██        | 409/2000 [03:38<14:49,  1.79it/s]

Training_loss 14.87583


 20%|██        | 410/2000 [03:39<14:33,  1.82it/s]

Training_loss 14.86818


 21%|██        | 411/2000 [03:39<14:45,  1.80it/s]

Training_loss 14.85066


 21%|██        | 412/2000 [03:40<14:49,  1.79it/s]

Training_loss 14.82868


 21%|██        | 413/2000 [03:41<15:12,  1.74it/s]

Training_loss 14.81577


 21%|██        | 414/2000 [03:41<14:53,  1.78it/s]

Training_loss 14.80739


 21%|██        | 415/2000 [03:42<14:40,  1.80it/s]

Training_loss 14.78581


 21%|██        | 416/2000 [03:42<14:33,  1.81it/s]

Training_loss 14.76258


 21%|██        | 417/2000 [03:43<14:45,  1.79it/s]

Training_loss 14.75651


 21%|██        | 418/2000 [03:43<14:12,  1.86it/s]

Training_loss 14.74259


 21%|██        | 419/2000 [03:44<14:24,  1.83it/s]

Training_loss 14.72815


 21%|██        | 420/2000 [03:44<14:19,  1.84it/s]

Training_loss 14.71359


 21%|██        | 421/2000 [03:45<14:23,  1.83it/s]

Training_loss 14.70130


 21%|██        | 422/2000 [03:45<14:32,  1.81it/s]

Training_loss 14.68330


 21%|██        | 423/2000 [03:46<14:25,  1.82it/s]

Training_loss 14.67243


 21%|██        | 424/2000 [03:47<14:33,  1.80it/s]

Training_loss 14.65925


 21%|██▏       | 425/2000 [03:47<14:47,  1.77it/s]

Training_loss 14.64174


 21%|██▏       | 426/2000 [03:48<14:10,  1.85it/s]

Training_loss 14.63028


 21%|██▏       | 427/2000 [03:48<14:34,  1.80it/s]

Training_loss 14.61207


 21%|██▏       | 428/2000 [03:49<15:32,  1.69it/s]

Training_loss 14.61314


 21%|██▏       | 429/2000 [03:50<15:44,  1.66it/s]

Training_loss 14.60563


 22%|██▏       | 430/2000 [03:50<14:28,  1.81it/s]

Training_loss 14.60057


 22%|██▏       | 431/2000 [03:50<13:20,  1.96it/s]

Training_loss 14.60290


 22%|██▏       | 432/2000 [03:51<11:53,  2.20it/s]

Training_loss 14.59390


 22%|██▏       | 433/2000 [03:51<11:13,  2.33it/s]

Training_loss 14.56859


 22%|██▏       | 434/2000 [03:52<12:37,  2.07it/s]

Training_loss 14.53966


 22%|██▏       | 435/2000 [03:52<14:08,  1.84it/s]

Training_loss 14.53857


 22%|██▏       | 436/2000 [03:53<14:49,  1.76it/s]

Training_loss 14.53157


 22%|██▏       | 437/2000 [03:54<14:48,  1.76it/s]

Training_loss 14.51583


 22%|██▏       | 438/2000 [03:54<15:03,  1.73it/s]

Training_loss 14.51101


 22%|██▏       | 439/2000 [03:55<15:20,  1.70it/s]

Training_loss 14.50900


 22%|██▏       | 440/2000 [03:55<15:14,  1.71it/s]

Training_loss 14.49730


 22%|██▏       | 441/2000 [03:56<15:37,  1.66it/s]

Training_loss 14.48924


 22%|██▏       | 442/2000 [03:56<14:35,  1.78it/s]

Training_loss 14.48029


 22%|██▏       | 443/2000 [03:57<14:35,  1.78it/s]

Training_loss 14.47867


 22%|██▏       | 444/2000 [03:57<13:31,  1.92it/s]

Training_loss 14.47031


 22%|██▏       | 445/2000 [03:58<13:41,  1.89it/s]

Training_loss 14.46314


 22%|██▏       | 446/2000 [03:59<13:57,  1.86it/s]

Training_loss 14.45211


 22%|██▏       | 447/2000 [03:59<13:56,  1.86it/s]

Training_loss 14.44010


 22%|██▏       | 448/2000 [04:00<14:21,  1.80it/s]

Training_loss 14.43415


 22%|██▏       | 449/2000 [04:00<14:41,  1.76it/s]

Training_loss 14.42406


 22%|██▎       | 450/2000 [04:01<14:24,  1.79it/s]

Training_loss 14.41124


 23%|██▎       | 451/2000 [04:01<14:18,  1.80it/s]

Training_loss 14.40535


 23%|██▎       | 452/2000 [04:02<14:21,  1.80it/s]

Training_loss 14.39620


 23%|██▎       | 453/2000 [04:03<14:55,  1.73it/s]

Training_loss 14.38910


 23%|██▎       | 454/2000 [04:03<15:24,  1.67it/s]

Training_loss 14.37892


 23%|██▎       | 455/2000 [04:04<15:12,  1.69it/s]

Training_loss 14.37643


 23%|██▎       | 456/2000 [04:04<14:58,  1.72it/s]

Training_loss 14.37100


 23%|██▎       | 457/2000 [04:05<14:38,  1.76it/s]

Training_loss 14.36321


 23%|██▎       | 458/2000 [04:05<13:07,  1.96it/s]

Training_loss 14.35614


 23%|██▎       | 459/2000 [04:06<11:19,  2.27it/s]

Training_loss 14.34520


 23%|██▎       | 460/2000 [04:06<10:06,  2.54it/s]

Training_loss 14.33697


 23%|██▎       | 461/2000 [04:06<09:51,  2.60it/s]

Training_loss 14.31325


 23%|██▎       | 462/2000 [04:07<10:59,  2.33it/s]

Training_loss 14.30741


 23%|██▎       | 463/2000 [04:07<11:56,  2.15it/s]

Training_loss 14.31058


 23%|██▎       | 464/2000 [04:08<12:57,  1.97it/s]

Training_loss 14.29961


 23%|██▎       | 465/2000 [04:09<13:53,  1.84it/s]

Training_loss 14.28878


 23%|██▎       | 466/2000 [04:09<14:25,  1.77it/s]

Training_loss 14.28095


 23%|██▎       | 467/2000 [04:10<14:46,  1.73it/s]

Training_loss 14.27073


 23%|██▎       | 468/2000 [04:10<15:24,  1.66it/s]

Training_loss 14.25609


 23%|██▎       | 469/2000 [04:11<15:19,  1.67it/s]

Training_loss 14.24593


 24%|██▎       | 470/2000 [04:12<15:32,  1.64it/s]

Training_loss 14.24261


 24%|██▎       | 471/2000 [04:12<15:43,  1.62it/s]

Training_loss 14.23707


 24%|██▎       | 472/2000 [04:13<15:45,  1.62it/s]

Training_loss 14.23171


 24%|██▎       | 473/2000 [04:13<15:38,  1.63it/s]

Training_loss 14.22627


 24%|██▎       | 474/2000 [04:14<16:30,  1.54it/s]

Training_loss 14.21710


 24%|██▍       | 475/2000 [04:15<16:32,  1.54it/s]

Training_loss 14.20490


 24%|██▍       | 476/2000 [04:15<16:04,  1.58it/s]

Training_loss 14.19269


 24%|██▍       | 477/2000 [04:16<15:15,  1.66it/s]

Training_loss 14.18409


 24%|██▍       | 478/2000 [04:17<14:57,  1.70it/s]

Training_loss 14.18189


 24%|██▍       | 479/2000 [04:17<14:18,  1.77it/s]

Training_loss 14.17954


 24%|██▍       | 480/2000 [04:18<13:53,  1.82it/s]

Training_loss 14.16882


 24%|██▍       | 481/2000 [04:18<13:54,  1.82it/s]

Training_loss 14.16065


 24%|██▍       | 482/2000 [04:19<13:53,  1.82it/s]

Training_loss 14.15225


 24%|██▍       | 483/2000 [04:19<14:23,  1.76it/s]

Training_loss 14.14404


 24%|██▍       | 484/2000 [04:20<14:40,  1.72it/s]

Training_loss 14.13265


 24%|██▍       | 485/2000 [04:21<15:01,  1.68it/s]

Training_loss 14.12238


 24%|██▍       | 486/2000 [04:21<14:39,  1.72it/s]

Training_loss 14.11541


 24%|██▍       | 487/2000 [04:22<14:22,  1.75it/s]

Training_loss 14.11330


 24%|██▍       | 488/2000 [04:22<12:43,  1.98it/s]

Training_loss 14.10846


 24%|██▍       | 489/2000 [04:22<12:23,  2.03it/s]

Training_loss 14.10353


 24%|██▍       | 490/2000 [04:23<13:19,  1.89it/s]

Training_loss 14.09801


 25%|██▍       | 491/2000 [04:24<12:43,  1.98it/s]

Training_loss 14.09178


 25%|██▍       | 492/2000 [04:24<11:52,  2.12it/s]

Training_loss 14.08444


 25%|██▍       | 493/2000 [04:24<12:10,  2.06it/s]

Training_loss 14.07233


 25%|██▍       | 494/2000 [04:25<12:20,  2.03it/s]

Training_loss 14.06398


 25%|██▍       | 495/2000 [04:25<12:26,  2.02it/s]

Training_loss 14.05743


 25%|██▍       | 496/2000 [04:26<13:23,  1.87it/s]

Training_loss 14.05082


 25%|██▍       | 497/2000 [04:27<13:41,  1.83it/s]

Training_loss 14.04907


 25%|██▍       | 498/2000 [04:27<13:53,  1.80it/s]

Training_loss 14.04684


 25%|██▍       | 499/2000 [04:28<13:44,  1.82it/s]

Training_loss 14.03642


 25%|██▌       | 500/2000 [04:28<14:00,  1.79it/s]

Training_loss 14.03181


 25%|██▌       | 501/2000 [04:29<13:37,  1.83it/s]

Training_loss 14.02298


 25%|██▌       | 502/2000 [04:29<13:39,  1.83it/s]

Training_loss 14.02007


 25%|██▌       | 503/2000 [04:30<13:35,  1.84it/s]

Training_loss 14.01502


 25%|██▌       | 504/2000 [04:31<13:57,  1.79it/s]

Training_loss 14.00302


 25%|██▌       | 505/2000 [04:31<13:37,  1.83it/s]

Training_loss 13.99933


 25%|██▌       | 506/2000 [04:32<13:42,  1.82it/s]

Training_loss 13.99089


 25%|██▌       | 507/2000 [04:32<13:46,  1.81it/s]

Training_loss 13.97679


 25%|██▌       | 508/2000 [04:33<14:13,  1.75it/s]

Training_loss 13.97081


 25%|██▌       | 509/2000 [04:33<14:15,  1.74it/s]

Training_loss 13.96489


 26%|██▌       | 510/2000 [04:34<14:25,  1.72it/s]

Training_loss 13.95871


 26%|██▌       | 511/2000 [04:35<14:18,  1.73it/s]

Training_loss 13.95575


 26%|██▌       | 512/2000 [04:35<14:19,  1.73it/s]

Training_loss 13.95010


 26%|██▌       | 513/2000 [04:36<14:09,  1.75it/s]

Training_loss 13.93953


 26%|██▌       | 514/2000 [04:36<14:12,  1.74it/s]

Training_loss 13.93130


 26%|██▌       | 515/2000 [04:37<14:21,  1.72it/s]

Training_loss 13.93082


 26%|██▌       | 516/2000 [04:37<14:14,  1.74it/s]

Training_loss 13.92770


 26%|██▌       | 517/2000 [04:38<12:16,  2.01it/s]

Training_loss 13.91996


 26%|██▌       | 518/2000 [04:38<11:19,  2.18it/s]

Training_loss 13.90937


 26%|██▌       | 519/2000 [04:38<10:34,  2.33it/s]

Training_loss 13.90301


 26%|██▌       | 520/2000 [04:39<10:47,  2.29it/s]

Training_loss 13.89863


 26%|██▌       | 521/2000 [04:39<11:43,  2.10it/s]

Training_loss 13.89547


 26%|██▌       | 522/2000 [04:40<12:08,  2.03it/s]

Training_loss 13.89427


 26%|██▌       | 523/2000 [04:41<12:59,  1.90it/s]

Training_loss 13.88840


 26%|██▌       | 524/2000 [04:41<13:21,  1.84it/s]

Training_loss 13.88058


 26%|██▋       | 525/2000 [04:42<13:29,  1.82it/s]

Training_loss 13.87687


 26%|██▋       | 526/2000 [04:42<12:58,  1.89it/s]

Training_loss 13.87238


 26%|██▋       | 527/2000 [04:43<13:09,  1.87it/s]

Training_loss 13.86347


 26%|██▋       | 528/2000 [04:43<13:25,  1.83it/s]

Training_loss 13.86131


 26%|██▋       | 529/2000 [04:44<13:18,  1.84it/s]

Training_loss 13.85749


 26%|██▋       | 530/2000 [04:44<13:39,  1.79it/s]

Training_loss 13.85092


 27%|██▋       | 531/2000 [04:45<14:06,  1.74it/s]

Training_loss 13.84875


 27%|██▋       | 532/2000 [04:46<13:51,  1.76it/s]

Training_loss 13.84520


 27%|██▋       | 533/2000 [04:46<13:36,  1.80it/s]

Training_loss 13.84040


 27%|██▋       | 534/2000 [04:47<13:38,  1.79it/s]

Training_loss 13.83598


 27%|██▋       | 535/2000 [04:47<13:46,  1.77it/s]

Training_loss 13.83302


 27%|██▋       | 536/2000 [04:48<13:39,  1.79it/s]

Training_loss 13.82847


 27%|██▋       | 537/2000 [04:48<14:01,  1.74it/s]

Training_loss 13.82639


 27%|██▋       | 538/2000 [04:49<14:25,  1.69it/s]

Training_loss 13.81990


 27%|██▋       | 539/2000 [04:50<14:20,  1.70it/s]

Training_loss 13.81532


 27%|██▋       | 540/2000 [04:50<14:18,  1.70it/s]

Training_loss 13.81486


 27%|██▋       | 541/2000 [04:51<12:52,  1.89it/s]

Training_loss 13.80766


 27%|██▋       | 542/2000 [04:51<12:11,  1.99it/s]

Training_loss 13.79749


 27%|██▋       | 543/2000 [04:52<12:54,  1.88it/s]

Training_loss 13.78849


 27%|██▋       | 544/2000 [04:52<13:05,  1.85it/s]

Training_loss 13.78388


 27%|██▋       | 545/2000 [04:53<13:22,  1.81it/s]

Training_loss 13.77957


 27%|██▋       | 546/2000 [04:53<12:26,  1.95it/s]

Training_loss 13.77543


 27%|██▋       | 547/2000 [04:54<10:46,  2.25it/s]

Training_loss 13.76784


 27%|██▋       | 548/2000 [04:54<10:11,  2.37it/s]

Training_loss 13.76530


 27%|██▋       | 549/2000 [04:54<09:47,  2.47it/s]

Training_loss 13.75774


 28%|██▊       | 550/2000 [04:55<11:11,  2.16it/s]

Training_loss 13.75524


 28%|██▊       | 551/2000 [04:55<11:24,  2.12it/s]

Training_loss 13.75230


 28%|██▊       | 552/2000 [04:56<12:01,  2.01it/s]

Training_loss 13.74833


 28%|██▊       | 553/2000 [04:57<12:43,  1.89it/s]

Training_loss 13.74074


 28%|██▊       | 554/2000 [04:57<12:34,  1.92it/s]

Training_loss 13.73436


 28%|██▊       | 555/2000 [04:58<13:10,  1.83it/s]

Training_loss 13.72762


 28%|██▊       | 556/2000 [04:58<13:32,  1.78it/s]

Training_loss 13.72474


 28%|██▊       | 557/2000 [04:59<13:43,  1.75it/s]

Training_loss 13.72857


 28%|██▊       | 558/2000 [04:59<13:36,  1.77it/s]

Training_loss 13.72200


 28%|██▊       | 559/2000 [05:00<13:28,  1.78it/s]

Training_loss 13.72252


 28%|██▊       | 560/2000 [05:00<13:16,  1.81it/s]

Training_loss 13.70513


 28%|██▊       | 561/2000 [05:01<13:10,  1.82it/s]

Training_loss 13.69901


 28%|██▊       | 562/2000 [05:02<13:09,  1.82it/s]

Training_loss 13.69452


 28%|██▊       | 563/2000 [05:02<13:33,  1.77it/s]

Training_loss 13.68989


 28%|██▊       | 564/2000 [05:03<13:23,  1.79it/s]

Training_loss 13.68640


 28%|██▊       | 565/2000 [05:03<13:18,  1.80it/s]

Training_loss 13.68149


 28%|██▊       | 566/2000 [05:04<13:29,  1.77it/s]

Training_loss 13.68085


 28%|██▊       | 567/2000 [05:04<13:00,  1.84it/s]

Training_loss 13.66532


 28%|██▊       | 568/2000 [05:05<13:14,  1.80it/s]

Training_loss 13.65416


 28%|██▊       | 569/2000 [05:05<13:23,  1.78it/s]

Training_loss 13.64902


 28%|██▊       | 570/2000 [05:06<13:33,  1.76it/s]

Training_loss 13.64634


 29%|██▊       | 571/2000 [05:07<13:26,  1.77it/s]

Training_loss 13.63565


 29%|██▊       | 572/2000 [05:07<13:02,  1.83it/s]

Training_loss 13.62988


 29%|██▊       | 573/2000 [05:08<12:58,  1.83it/s]

Training_loss 13.62459


 29%|██▊       | 574/2000 [05:08<11:30,  2.06it/s]

Training_loss 13.61906


 29%|██▉       | 575/2000 [05:09<12:01,  1.97it/s]

Training_loss 13.60927


 29%|██▉       | 576/2000 [05:09<12:02,  1.97it/s]

Training_loss 13.60169


 29%|██▉       | 577/2000 [05:09<10:56,  2.17it/s]

Training_loss 13.59662


 29%|██▉       | 578/2000 [05:10<10:19,  2.30it/s]

Training_loss 13.58932


 29%|██▉       | 579/2000 [05:10<10:01,  2.36it/s]

Training_loss 13.58464


 29%|██▉       | 580/2000 [05:11<10:39,  2.22it/s]

Training_loss 13.57678


 29%|██▉       | 581/2000 [05:11<11:27,  2.06it/s]

Training_loss 13.57319


 29%|██▉       | 582/2000 [05:12<12:19,  1.92it/s]

Training_loss 13.56654


 29%|██▉       | 583/2000 [05:12<12:43,  1.86it/s]

Training_loss 13.56198


 29%|██▉       | 584/2000 [05:13<12:54,  1.83it/s]

Training_loss 13.55990


 29%|██▉       | 585/2000 [05:14<12:54,  1.83it/s]

Training_loss 13.55510


 29%|██▉       | 586/2000 [05:14<13:15,  1.78it/s]

Training_loss 13.55262


 29%|██▉       | 587/2000 [05:15<12:57,  1.82it/s]

Training_loss 13.54803


 29%|██▉       | 588/2000 [05:15<13:05,  1.80it/s]

Training_loss 13.54058


 29%|██▉       | 589/2000 [05:16<13:30,  1.74it/s]

Training_loss 13.53617


 30%|██▉       | 590/2000 [05:17<13:44,  1.71it/s]

Training_loss 13.53062


 30%|██▉       | 591/2000 [05:17<13:01,  1.80it/s]

Training_loss 13.52708


 30%|██▉       | 592/2000 [05:17<11:30,  2.04it/s]

Training_loss 13.52534


 30%|██▉       | 593/2000 [05:18<10:26,  2.25it/s]

Training_loss 13.51571


 30%|██▉       | 594/2000 [05:18<09:53,  2.37it/s]

Training_loss 13.51569


 30%|██▉       | 595/2000 [05:18<09:15,  2.53it/s]

Training_loss 13.50730


 30%|██▉       | 596/2000 [05:19<10:14,  2.29it/s]

Training_loss 13.50057


 30%|██▉       | 597/2000 [05:19<11:08,  2.10it/s]

Training_loss 13.49575


 30%|██▉       | 598/2000 [05:20<11:41,  2.00it/s]

Training_loss 13.48895


 30%|██▉       | 599/2000 [05:21<12:07,  1.93it/s]

Training_loss 13.47790


 30%|███       | 600/2000 [05:21<12:23,  1.88it/s]

Training_loss 13.47149


 30%|███       | 601/2000 [05:22<12:29,  1.87it/s]

Training_loss 13.46455


 30%|███       | 602/2000 [05:22<13:02,  1.79it/s]

Training_loss 13.45340


 30%|███       | 603/2000 [05:23<13:01,  1.79it/s]

Training_loss 13.44417


 30%|███       | 604/2000 [05:24<13:29,  1.73it/s]

Training_loss 13.43899


 30%|███       | 605/2000 [05:24<13:28,  1.73it/s]

Training_loss 13.43271


 30%|███       | 606/2000 [05:25<13:28,  1.72it/s]

Training_loss 13.42541


 30%|███       | 607/2000 [05:25<13:15,  1.75it/s]

Training_loss 13.42010


 30%|███       | 608/2000 [05:26<11:23,  2.04it/s]

Training_loss 13.41317


 30%|███       | 609/2000 [05:26<11:00,  2.11it/s]

Training_loss 13.40727


 30%|███       | 610/2000 [05:26<09:42,  2.39it/s]

Training_loss 13.40340


 31%|███       | 611/2000 [05:27<09:52,  2.35it/s]

Training_loss 13.39853


 31%|███       | 612/2000 [05:27<10:37,  2.18it/s]

Training_loss 13.39168


 31%|███       | 613/2000 [05:28<11:27,  2.02it/s]

Training_loss 13.38500


 31%|███       | 614/2000 [05:28<11:32,  2.00it/s]

Training_loss 13.38210


 31%|███       | 615/2000 [05:29<11:34,  1.99it/s]

Training_loss 13.37474


 31%|███       | 616/2000 [05:29<11:32,  2.00it/s]

Training_loss 13.36483


 31%|███       | 617/2000 [05:30<11:34,  1.99it/s]

Training_loss 13.35815


 31%|███       | 618/2000 [05:30<11:46,  1.96it/s]

Training_loss 13.35163


 31%|███       | 619/2000 [05:31<12:11,  1.89it/s]

Training_loss 13.34578


 31%|███       | 620/2000 [05:31<12:13,  1.88it/s]

Training_loss 13.34459


 31%|███       | 621/2000 [05:32<12:54,  1.78it/s]

Training_loss 13.34410


 31%|███       | 622/2000 [05:33<12:46,  1.80it/s]

Training_loss 13.33748


 31%|███       | 623/2000 [05:33<12:39,  1.81it/s]

Training_loss 13.33055


 31%|███       | 624/2000 [05:34<12:29,  1.84it/s]

Training_loss 13.32272


 31%|███▏      | 625/2000 [05:34<12:53,  1.78it/s]

Training_loss 13.32068


 31%|███▏      | 626/2000 [05:35<13:31,  1.69it/s]

Training_loss 13.31540


 31%|███▏      | 627/2000 [05:36<13:45,  1.66it/s]

Training_loss 13.31170


 31%|███▏      | 628/2000 [05:36<13:53,  1.65it/s]

Training_loss 13.30579


 31%|███▏      | 629/2000 [05:37<13:51,  1.65it/s]

Training_loss 13.29777


 32%|███▏      | 630/2000 [05:37<13:55,  1.64it/s]

Training_loss 13.29110


 32%|███▏      | 631/2000 [05:38<12:59,  1.76it/s]

Training_loss 13.28729


 32%|███▏      | 632/2000 [05:38<13:03,  1.75it/s]

Training_loss 13.28111


 32%|███▏      | 633/2000 [05:39<13:13,  1.72it/s]

Training_loss 13.27295


 32%|███▏      | 634/2000 [05:40<13:02,  1.75it/s]

Training_loss 13.27094


 32%|███▏      | 635/2000 [05:40<13:10,  1.73it/s]

Training_loss 13.26943


 32%|███▏      | 636/2000 [05:41<13:10,  1.73it/s]

Training_loss 13.26374


 32%|███▏      | 637/2000 [05:41<12:11,  1.86it/s]

Training_loss 13.25912


 32%|███▏      | 638/2000 [05:42<10:43,  2.12it/s]

Training_loss 13.25186


 32%|███▏      | 639/2000 [05:42<10:48,  2.10it/s]

Training_loss 13.24351


 32%|███▏      | 640/2000 [05:43<10:55,  2.07it/s]

Training_loss 13.23556


 32%|███▏      | 641/2000 [05:43<10:02,  2.25it/s]

Training_loss 13.23106


 32%|███▏      | 642/2000 [05:43<09:53,  2.29it/s]

Training_loss 13.22934


 32%|███▏      | 643/2000 [05:44<11:08,  2.03it/s]

Training_loss 13.22388


 32%|███▏      | 644/2000 [05:45<11:41,  1.93it/s]

Training_loss 13.21447


 32%|███▏      | 645/2000 [05:45<12:09,  1.86it/s]

Training_loss 13.21012


 32%|███▏      | 646/2000 [05:46<12:41,  1.78it/s]

Training_loss 13.20842


 32%|███▏      | 647/2000 [05:46<13:04,  1.72it/s]

Training_loss 13.20122


 32%|███▏      | 648/2000 [05:47<12:48,  1.76it/s]

Training_loss 13.19673


 32%|███▏      | 649/2000 [05:47<12:48,  1.76it/s]

Training_loss 13.18692


 32%|███▎      | 650/2000 [05:48<13:05,  1.72it/s]

Training_loss 13.18292


 33%|███▎      | 651/2000 [05:49<12:49,  1.75it/s]

Training_loss 13.17497


 33%|███▎      | 652/2000 [05:49<12:39,  1.78it/s]

Training_loss 13.16937


 33%|███▎      | 653/2000 [05:50<12:14,  1.84it/s]

Training_loss 13.16333


 33%|███▎      | 654/2000 [05:50<12:38,  1.78it/s]

Training_loss 13.15536


 33%|███▎      | 655/2000 [05:51<12:42,  1.76it/s]

Training_loss 13.14743


 33%|███▎      | 656/2000 [05:51<12:47,  1.75it/s]

Training_loss 13.14137


 33%|███▎      | 657/2000 [05:52<12:58,  1.72it/s]

Training_loss 13.13451


 33%|███▎      | 658/2000 [05:53<12:51,  1.74it/s]

Training_loss 13.12875


 33%|███▎      | 659/2000 [05:53<12:41,  1.76it/s]

Training_loss 13.12423


 33%|███▎      | 660/2000 [05:54<12:51,  1.74it/s]

Training_loss 13.11960


 33%|███▎      | 661/2000 [05:54<12:35,  1.77it/s]

Training_loss 13.11352


 33%|███▎      | 662/2000 [05:55<12:27,  1.79it/s]

Training_loss 13.11180


 33%|███▎      | 663/2000 [05:55<12:45,  1.75it/s]

Training_loss 13.10640


 33%|███▎      | 664/2000 [05:56<12:48,  1.74it/s]

Training_loss 13.10217


 33%|███▎      | 665/2000 [05:57<12:27,  1.79it/s]

Training_loss 13.09516


 33%|███▎      | 666/2000 [05:57<12:44,  1.74it/s]

Training_loss 13.08873


 33%|███▎      | 667/2000 [05:57<11:10,  1.99it/s]

Training_loss 13.08388


 33%|███▎      | 668/2000 [05:58<10:51,  2.05it/s]

Training_loss 13.07756


 33%|███▎      | 669/2000 [05:59<11:21,  1.95it/s]

Training_loss 13.07055


 34%|███▎      | 670/2000 [05:59<09:55,  2.24it/s]

Training_loss 13.06569


 34%|███▎      | 671/2000 [05:59<09:42,  2.28it/s]

Training_loss 13.06329


 34%|███▎      | 672/2000 [06:00<10:35,  2.09it/s]

Training_loss 13.05586


 34%|███▎      | 673/2000 [06:00<11:07,  1.99it/s]

Training_loss 13.05059


 34%|███▎      | 674/2000 [06:01<11:35,  1.91it/s]

Training_loss 13.04542


 34%|███▍      | 675/2000 [06:02<12:04,  1.83it/s]

Training_loss 13.03967


 34%|███▍      | 676/2000 [06:02<12:28,  1.77it/s]

Training_loss 13.03138


 34%|███▍      | 677/2000 [06:03<12:41,  1.74it/s]

Training_loss 13.02963


 34%|███▍      | 678/2000 [06:03<13:06,  1.68it/s]

Training_loss 13.01862


 34%|███▍      | 679/2000 [06:04<13:11,  1.67it/s]

Training_loss 13.01146


 34%|███▍      | 680/2000 [06:05<13:28,  1.63it/s]

Training_loss 13.00931


 34%|███▍      | 681/2000 [06:05<13:46,  1.60it/s]

Training_loss 13.00055


 34%|███▍      | 682/2000 [06:06<13:13,  1.66it/s]

Training_loss 12.99330


 34%|███▍      | 683/2000 [06:06<13:03,  1.68it/s]

Training_loss 12.98955


 34%|███▍      | 684/2000 [06:07<12:42,  1.73it/s]

Training_loss 12.98618


 34%|███▍      | 685/2000 [06:07<12:15,  1.79it/s]

Training_loss 12.98327


 34%|███▍      | 686/2000 [06:08<12:00,  1.82it/s]

Training_loss 12.97847


 34%|███▍      | 687/2000 [06:09<12:34,  1.74it/s]

Training_loss 12.97231


 34%|███▍      | 688/2000 [06:09<12:08,  1.80it/s]

Training_loss 12.96015


 34%|███▍      | 689/2000 [06:10<12:01,  1.82it/s]

Training_loss 12.95031


 34%|███▍      | 690/2000 [06:10<12:12,  1.79it/s]

Training_loss 12.94256


 35%|███▍      | 691/2000 [06:11<12:24,  1.76it/s]

Training_loss 12.93878


 35%|███▍      | 692/2000 [06:11<11:55,  1.83it/s]

Training_loss 12.93343


 35%|███▍      | 693/2000 [06:12<11:57,  1.82it/s]

Training_loss 12.92495


 35%|███▍      | 694/2000 [06:12<11:59,  1.81it/s]

Training_loss 12.91882


 35%|███▍      | 695/2000 [06:13<12:20,  1.76it/s]

Training_loss 12.91164


 35%|███▍      | 696/2000 [06:13<10:42,  2.03it/s]

Training_loss 12.90619


 35%|███▍      | 697/2000 [06:14<09:42,  2.24it/s]

Training_loss 12.89977


 35%|███▍      | 698/2000 [06:14<09:13,  2.35it/s]

Training_loss 12.89597


 35%|███▍      | 699/2000 [06:15<09:26,  2.30it/s]

Training_loss 12.88837


 35%|███▌      | 700/2000 [06:15<10:20,  2.10it/s]

Training_loss 12.87788


 35%|███▌      | 701/2000 [06:16<10:55,  1.98it/s]

Training_loss 12.87362


 35%|███▌      | 702/2000 [06:16<11:02,  1.96it/s]

Training_loss 12.86841


 35%|███▌      | 703/2000 [06:17<11:32,  1.87it/s]

Training_loss 12.86635


 35%|███▌      | 704/2000 [06:17<11:26,  1.89it/s]

Training_loss 12.86308


 35%|███▌      | 705/2000 [06:18<11:47,  1.83it/s]

Training_loss 12.85803


 35%|███▌      | 706/2000 [06:19<12:05,  1.78it/s]

Training_loss 12.84814


 35%|███▌      | 707/2000 [06:19<11:54,  1.81it/s]

Training_loss 12.84338


 35%|███▌      | 708/2000 [06:20<11:52,  1.81it/s]

Training_loss 12.83670


 35%|███▌      | 709/2000 [06:20<12:03,  1.78it/s]

Training_loss 12.83630


 36%|███▌      | 710/2000 [06:21<12:13,  1.76it/s]

Training_loss 12.82852


 36%|███▌      | 711/2000 [06:21<12:23,  1.73it/s]

Training_loss 12.82217


 36%|███▌      | 712/2000 [06:22<12:21,  1.74it/s]

Training_loss 12.81842


 36%|███▌      | 713/2000 [06:22<10:35,  2.03it/s]

Training_loss 12.81237


 36%|███▌      | 714/2000 [06:23<10:29,  2.04it/s]

Training_loss 12.80500


 36%|███▌      | 715/2000 [06:23<09:46,  2.19it/s]

Training_loss 12.80044


 36%|███▌      | 716/2000 [06:23<09:15,  2.31it/s]

Training_loss 12.79829


 36%|███▌      | 717/2000 [06:24<10:02,  2.13it/s]

Training_loss 12.78955


 36%|███▌      | 718/2000 [06:24<09:38,  2.22it/s]

Training_loss 12.78604


 36%|███▌      | 719/2000 [06:25<08:44,  2.44it/s]

Training_loss 12.77886


 36%|███▌      | 720/2000 [06:25<07:35,  2.81it/s]

Training_loss 12.77277


 36%|███▌      | 721/2000 [06:25<06:53,  3.09it/s]

Training_loss 12.77087


 36%|███▌      | 722/2000 [06:25<06:35,  3.23it/s]

Training_loss 12.76571


 36%|███▌      | 723/2000 [06:26<06:18,  3.38it/s]

Training_loss 12.75946


 36%|███▌      | 724/2000 [06:26<07:33,  2.81it/s]

Training_loss 12.75463


 36%|███▋      | 725/2000 [06:27<07:16,  2.92it/s]

Training_loss 12.75206


 36%|███▋      | 726/2000 [06:27<07:50,  2.71it/s]

Training_loss 12.74568


 36%|███▋      | 727/2000 [06:27<07:23,  2.87it/s]

Training_loss 12.74080


 36%|███▋      | 728/2000 [06:28<06:53,  3.08it/s]

Training_loss 12.73710


 36%|███▋      | 729/2000 [06:28<06:37,  3.20it/s]

Training_loss 12.73296


 36%|███▋      | 730/2000 [06:28<06:25,  3.29it/s]

Training_loss 12.73115


 37%|███▋      | 731/2000 [06:29<07:23,  2.86it/s]

Training_loss 12.72405


 37%|███▋      | 732/2000 [06:29<07:43,  2.73it/s]

Training_loss 12.72013


 37%|███▋      | 733/2000 [06:30<08:39,  2.44it/s]

Training_loss 12.71631


 37%|███▋      | 734/2000 [06:30<09:14,  2.28it/s]

Training_loss 12.71011


 37%|███▋      | 735/2000 [06:31<09:36,  2.20it/s]

Training_loss 12.70811


 37%|███▋      | 736/2000 [06:31<10:06,  2.08it/s]

Training_loss 12.70377


 37%|███▋      | 737/2000 [06:31<09:50,  2.14it/s]

Training_loss 12.69302


 37%|███▋      | 738/2000 [06:32<11:01,  1.91it/s]

Training_loss 12.68720


 37%|███▋      | 739/2000 [06:33<11:49,  1.78it/s]

Training_loss 12.68198


 37%|███▋      | 740/2000 [06:33<12:15,  1.71it/s]

Training_loss 12.67568


 37%|███▋      | 741/2000 [06:34<12:42,  1.65it/s]

Training_loss 12.66697


 37%|███▋      | 742/2000 [06:35<12:39,  1.66it/s]

Training_loss 12.66074


 37%|███▋      | 743/2000 [06:35<13:01,  1.61it/s]

Training_loss 12.65318


 37%|███▋      | 744/2000 [06:36<12:55,  1.62it/s]

Training_loss 12.65049


 37%|███▋      | 745/2000 [06:37<13:00,  1.61it/s]

Training_loss 12.64536


 37%|███▋      | 746/2000 [06:37<13:01,  1.60it/s]

Training_loss 12.63950


 37%|███▋      | 747/2000 [06:38<12:15,  1.70it/s]

Training_loss 12.63370


 37%|███▋      | 748/2000 [06:38<10:37,  1.96it/s]

Training_loss 12.62963


 37%|███▋      | 749/2000 [06:38<09:14,  2.26it/s]

Training_loss 12.62479


 38%|███▊      | 750/2000 [06:39<08:20,  2.50it/s]

Training_loss 12.62063


 38%|███▊      | 751/2000 [06:39<07:38,  2.72it/s]

Training_loss 12.61724


 38%|███▊      | 752/2000 [06:39<07:10,  2.90it/s]

Training_loss 12.61373


 38%|███▊      | 753/2000 [06:40<06:53,  3.02it/s]

Training_loss 12.61244


 38%|███▊      | 754/2000 [06:40<06:40,  3.11it/s]

Training_loss 12.60207


 38%|███▊      | 755/2000 [06:40<06:34,  3.16it/s]

Training_loss 12.59098


 38%|███▊      | 756/2000 [06:40<06:29,  3.19it/s]

Training_loss 12.58571


 38%|███▊      | 757/2000 [06:41<06:28,  3.20it/s]

Training_loss 12.58054


 38%|███▊      | 758/2000 [06:41<06:31,  3.18it/s]

Training_loss 12.57568


 38%|███▊      | 759/2000 [06:41<06:24,  3.22it/s]

Training_loss 12.56868


 38%|███▊      | 760/2000 [06:42<06:23,  3.24it/s]

Training_loss 12.56341


 38%|███▊      | 761/2000 [06:42<06:11,  3.34it/s]

Training_loss 12.55637


 38%|███▊      | 762/2000 [06:42<06:06,  3.38it/s]

Training_loss 12.55082


 38%|███▊      | 763/2000 [06:43<06:12,  3.32it/s]

Training_loss 12.54279


 38%|███▊      | 764/2000 [06:43<06:27,  3.19it/s]

Training_loss 12.53911


 38%|███▊      | 765/2000 [06:43<06:17,  3.27it/s]

Training_loss 12.53168


 38%|███▊      | 766/2000 [06:43<06:22,  3.23it/s]

Training_loss 12.52799


 38%|███▊      | 767/2000 [06:44<08:04,  2.55it/s]

Training_loss 12.52125


 38%|███▊      | 768/2000 [06:45<09:12,  2.23it/s]

Training_loss 12.51058


 38%|███▊      | 769/2000 [06:45<09:50,  2.09it/s]

Training_loss 12.50141


 38%|███▊      | 770/2000 [06:46<09:17,  2.21it/s]

Training_loss 12.49567


 39%|███▊      | 771/2000 [06:46<10:23,  1.97it/s]

Training_loss 12.48771


 39%|███▊      | 772/2000 [06:47<09:47,  2.09it/s]

Training_loss 12.48113


 39%|███▊      | 773/2000 [06:47<10:26,  1.96it/s]

Training_loss 12.47641


 39%|███▊      | 774/2000 [06:48<11:04,  1.85it/s]

Training_loss 12.46782


 39%|███▉      | 775/2000 [06:49<11:59,  1.70it/s]

Training_loss 12.46231


 39%|███▉      | 776/2000 [06:49<12:16,  1.66it/s]

Training_loss 12.45502


 39%|███▉      | 777/2000 [06:50<12:43,  1.60it/s]

Training_loss 12.45257


 39%|███▉      | 778/2000 [06:50<12:31,  1.63it/s]

Training_loss 12.44448


 39%|███▉      | 779/2000 [06:51<12:02,  1.69it/s]

Training_loss 12.43780


 39%|███▉      | 780/2000 [06:52<12:17,  1.65it/s]

Training_loss 12.43429


 39%|███▉      | 781/2000 [06:52<12:13,  1.66it/s]

Training_loss 12.42929


 39%|███▉      | 782/2000 [06:53<11:51,  1.71it/s]

Training_loss 12.42298


 39%|███▉      | 783/2000 [06:53<11:50,  1.71it/s]

Training_loss 12.41784


 39%|███▉      | 784/2000 [06:54<12:28,  1.62it/s]

Training_loss 12.41064


 39%|███▉      | 785/2000 [06:55<12:49,  1.58it/s]

Training_loss 12.40583


 39%|███▉      | 786/2000 [06:55<12:41,  1.59it/s]

Training_loss 12.40055


 39%|███▉      | 787/2000 [06:56<12:41,  1.59it/s]

Training_loss 12.39036


 39%|███▉      | 788/2000 [06:57<12:32,  1.61it/s]

Training_loss 12.38525


 39%|███▉      | 789/2000 [06:57<12:35,  1.60it/s]

Training_loss 12.37735


 40%|███▉      | 790/2000 [06:58<12:43,  1.58it/s]

Training_loss 12.37191


 40%|███▉      | 791/2000 [06:58<12:49,  1.57it/s]

Training_loss 12.36783


 40%|███▉      | 792/2000 [06:59<12:39,  1.59it/s]

Training_loss 12.35855


 40%|███▉      | 793/2000 [07:00<12:30,  1.61it/s]

Training_loss 12.35528


 40%|███▉      | 794/2000 [07:00<12:34,  1.60it/s]

Training_loss 12.34866


 40%|███▉      | 795/2000 [07:01<12:37,  1.59it/s]

Training_loss 12.33980


 40%|███▉      | 796/2000 [07:01<11:22,  1.77it/s]

Training_loss 12.33352


 40%|███▉      | 797/2000 [07:02<09:57,  2.01it/s]

Training_loss 12.32951


 40%|███▉      | 798/2000 [07:02<09:04,  2.21it/s]

Training_loss 12.32736


 40%|███▉      | 799/2000 [07:02<08:21,  2.40it/s]

Training_loss 12.32153


 40%|████      | 800/2000 [07:03<09:29,  2.11it/s]

Training_loss 12.31566


 40%|████      | 801/2000 [07:04<10:22,  1.93it/s]

Training_loss 12.30851


 40%|████      | 802/2000 [07:04<10:56,  1.82it/s]

Training_loss 12.30291


 40%|████      | 803/2000 [07:05<11:21,  1.76it/s]

Training_loss 12.29453


 40%|████      | 804/2000 [07:05<11:28,  1.74it/s]

Training_loss 12.28894


 40%|████      | 805/2000 [07:06<11:34,  1.72it/s]

Training_loss 12.28050


 40%|████      | 806/2000 [07:07<11:48,  1.69it/s]

Training_loss 12.27688


 40%|████      | 807/2000 [07:07<11:47,  1.69it/s]

Training_loss 12.27035


 40%|████      | 808/2000 [07:08<11:41,  1.70it/s]

Training_loss 12.26345


 40%|████      | 809/2000 [07:08<11:48,  1.68it/s]

Training_loss 12.26157


 40%|████      | 810/2000 [07:09<12:02,  1.65it/s]

Training_loss 12.25679


 41%|████      | 811/2000 [07:10<12:16,  1.61it/s]

Training_loss 12.25047


 41%|████      | 812/2000 [07:10<12:33,  1.58it/s]

Training_loss 12.24226


 41%|████      | 813/2000 [07:11<12:27,  1.59it/s]

Training_loss 12.23458


 41%|████      | 814/2000 [07:12<12:51,  1.54it/s]

Training_loss 12.22708


 41%|████      | 815/2000 [07:12<13:07,  1.51it/s]

Training_loss 12.22211


 41%|████      | 816/2000 [07:13<13:09,  1.50it/s]

Training_loss 12.21342


 41%|████      | 817/2000 [07:14<12:54,  1.53it/s]

Training_loss 12.20578


 41%|████      | 818/2000 [07:14<13:08,  1.50it/s]

Training_loss 12.20056


 41%|████      | 819/2000 [07:15<13:00,  1.51it/s]

Training_loss 12.19520


 41%|████      | 820/2000 [07:16<12:56,  1.52it/s]

Training_loss 12.18753


 41%|████      | 821/2000 [07:16<13:06,  1.50it/s]

Training_loss 12.18363


 41%|████      | 822/2000 [07:17<12:54,  1.52it/s]

Training_loss 12.17347


 41%|████      | 823/2000 [07:17<11:25,  1.72it/s]

Training_loss 12.16657


 41%|████      | 824/2000 [07:18<10:31,  1.86it/s]

Training_loss 12.16314


 41%|████▏     | 825/2000 [07:18<09:52,  1.98it/s]

Training_loss 12.15521


 41%|████▏     | 826/2000 [07:19<09:38,  2.03it/s]

Training_loss 12.15097


 41%|████▏     | 827/2000 [07:19<10:25,  1.88it/s]

Training_loss 12.14694


 41%|████▏     | 828/2000 [07:20<10:55,  1.79it/s]

Training_loss 12.13958


 41%|████▏     | 829/2000 [07:21<11:17,  1.73it/s]

Training_loss 12.13046


 42%|████▏     | 830/2000 [07:21<11:55,  1.64it/s]

Training_loss 12.12469


 42%|████▏     | 831/2000 [07:22<12:00,  1.62it/s]

Training_loss 12.11785


 42%|████▏     | 832/2000 [07:23<12:02,  1.62it/s]

Training_loss 12.11094


 42%|████▏     | 833/2000 [07:23<11:51,  1.64it/s]

Training_loss 12.10483


 42%|████▏     | 834/2000 [07:24<11:38,  1.67it/s]

Training_loss 12.10053


 42%|████▏     | 835/2000 [07:24<11:35,  1.68it/s]

Training_loss 12.09463


 42%|████▏     | 836/2000 [07:25<11:36,  1.67it/s]

Training_loss 12.08581


 42%|████▏     | 837/2000 [07:26<11:36,  1.67it/s]

Training_loss 12.07991


 42%|████▏     | 838/2000 [07:26<11:48,  1.64it/s]

Training_loss 12.07305


 42%|████▏     | 839/2000 [07:27<11:53,  1.63it/s]

Training_loss 12.06542


 42%|████▏     | 840/2000 [07:27<11:44,  1.65it/s]

Training_loss 12.05940


 42%|████▏     | 841/2000 [07:28<11:16,  1.71it/s]

Training_loss 12.05353


 42%|████▏     | 842/2000 [07:28<10:57,  1.76it/s]

Training_loss 12.04734


 42%|████▏     | 843/2000 [07:29<10:59,  1.75it/s]

Training_loss 12.04310


 42%|████▏     | 844/2000 [07:30<10:48,  1.78it/s]

Training_loss 12.03491


 42%|████▏     | 845/2000 [07:30<10:54,  1.77it/s]

Training_loss 12.02815


 42%|████▏     | 846/2000 [07:31<11:05,  1.73it/s]

Training_loss 12.01918


 42%|████▏     | 847/2000 [07:31<10:57,  1.75it/s]

Training_loss 12.01136


 42%|████▏     | 848/2000 [07:32<10:35,  1.81it/s]

Training_loss 12.00434


 42%|████▏     | 849/2000 [07:32<10:20,  1.86it/s]

Training_loss 12.00060


 42%|████▎     | 850/2000 [07:33<10:23,  1.84it/s]

Training_loss 11.99501


 43%|████▎     | 851/2000 [07:33<10:01,  1.91it/s]

Training_loss 11.98858


 43%|████▎     | 852/2000 [07:34<10:13,  1.87it/s]

Training_loss 11.98383


 43%|████▎     | 853/2000 [07:34<09:51,  1.94it/s]

Training_loss 11.97736


 43%|████▎     | 854/2000 [07:35<09:31,  2.00it/s]

Training_loss 11.97052


 43%|████▎     | 855/2000 [07:35<09:38,  1.98it/s]

Training_loss 11.96634


 43%|████▎     | 856/2000 [07:36<10:08,  1.88it/s]

Training_loss 11.95979


 43%|████▎     | 857/2000 [07:37<10:41,  1.78it/s]

Training_loss 11.94931


 43%|████▎     | 858/2000 [07:37<10:48,  1.76it/s]

Training_loss 11.94741


 43%|████▎     | 859/2000 [07:38<10:54,  1.74it/s]

Training_loss 11.93757


 43%|████▎     | 860/2000 [07:38<10:49,  1.75it/s]

Training_loss 11.92955


 43%|████▎     | 861/2000 [07:39<10:49,  1.75it/s]

Training_loss 11.92235


 43%|████▎     | 862/2000 [07:39<10:35,  1.79it/s]

Training_loss 11.91394


 43%|████▎     | 863/2000 [07:40<10:10,  1.86it/s]

Training_loss 11.90733


 43%|████▎     | 864/2000 [07:40<10:16,  1.84it/s]

Training_loss 11.89822


 43%|████▎     | 865/2000 [07:41<10:20,  1.83it/s]

Training_loss 11.88987


 43%|████▎     | 866/2000 [07:41<08:51,  2.13it/s]

Training_loss 11.88214


 43%|████▎     | 867/2000 [07:42<08:47,  2.15it/s]

Training_loss 11.87673


 43%|████▎     | 868/2000 [07:42<09:17,  2.03it/s]

Training_loss 11.86908


 43%|████▎     | 869/2000 [07:43<09:20,  2.02it/s]

Training_loss 11.86541


 44%|████▎     | 870/2000 [07:43<09:24,  2.00it/s]

Training_loss 11.85805


 44%|████▎     | 871/2000 [07:44<09:40,  1.94it/s]

Training_loss 11.85211


 44%|████▎     | 872/2000 [07:44<09:54,  1.90it/s]

Training_loss 11.84414


 44%|████▎     | 873/2000 [07:45<09:50,  1.91it/s]

Training_loss 11.83844


 44%|████▎     | 874/2000 [07:46<10:05,  1.86it/s]

Training_loss 11.83423


 44%|████▍     | 875/2000 [07:46<09:55,  1.89it/s]

Training_loss 11.82643


 44%|████▍     | 876/2000 [07:47<10:02,  1.87it/s]

Training_loss 11.81755


 44%|████▍     | 877/2000 [07:47<09:59,  1.87it/s]

Training_loss 11.81440


 44%|████▍     | 878/2000 [07:48<09:44,  1.92it/s]

Training_loss 11.80931


 44%|████▍     | 879/2000 [07:48<09:41,  1.93it/s]

Training_loss 11.80335


 44%|████▍     | 880/2000 [07:49<09:43,  1.92it/s]

Training_loss 11.79646


 44%|████▍     | 881/2000 [07:49<09:47,  1.90it/s]

Training_loss 11.78630


 44%|████▍     | 882/2000 [07:50<08:45,  2.13it/s]

Training_loss 11.77979


 44%|████▍     | 883/2000 [07:50<08:22,  2.22it/s]

Training_loss 11.76900


 44%|████▍     | 884/2000 [07:50<08:00,  2.32it/s]

Training_loss 11.76389


 44%|████▍     | 885/2000 [07:51<08:36,  2.16it/s]

Training_loss 11.75725


 44%|████▍     | 886/2000 [07:51<09:08,  2.03it/s]

Training_loss 11.74858


 44%|████▍     | 887/2000 [07:52<09:15,  2.00it/s]

Training_loss 11.74151


 44%|████▍     | 888/2000 [07:52<09:22,  1.98it/s]

Training_loss 11.73504


 44%|████▍     | 889/2000 [07:53<09:28,  1.96it/s]

Training_loss 11.72868


 44%|████▍     | 890/2000 [07:53<09:32,  1.94it/s]

Training_loss 11.71872


 45%|████▍     | 891/2000 [07:54<09:31,  1.94it/s]

Training_loss 11.71332


 45%|████▍     | 892/2000 [07:55<09:25,  1.96it/s]

Training_loss 11.70509


 45%|████▍     | 893/2000 [07:55<09:50,  1.88it/s]

Training_loss 11.70081


 45%|████▍     | 894/2000 [07:56<09:53,  1.86it/s]

Training_loss 11.69611


 45%|████▍     | 895/2000 [07:56<09:55,  1.85it/s]

Training_loss 11.68731


 45%|████▍     | 896/2000 [07:57<10:05,  1.82it/s]

Training_loss 11.67996


 45%|████▍     | 897/2000 [07:57<09:58,  1.84it/s]

Training_loss 11.67427


 45%|████▍     | 898/2000 [07:58<09:42,  1.89it/s]

Training_loss 11.66421


 45%|████▍     | 899/2000 [07:58<09:50,  1.86it/s]

Training_loss 11.65714


 45%|████▌     | 900/2000 [07:59<09:43,  1.88it/s]

Training_loss 11.65104


 45%|████▌     | 901/2000 [07:59<09:39,  1.90it/s]

Training_loss 11.64552


 45%|████▌     | 902/2000 [08:00<09:44,  1.88it/s]

Training_loss 11.64129


 45%|████▌     | 903/2000 [08:00<09:40,  1.89it/s]

Training_loss 11.63317


 45%|████▌     | 904/2000 [08:01<09:33,  1.91it/s]

Training_loss 11.62599


 45%|████▌     | 905/2000 [08:01<09:26,  1.93it/s]

Training_loss 11.62172


 45%|████▌     | 906/2000 [08:02<09:17,  1.96it/s]

Training_loss 11.61893


 45%|████▌     | 907/2000 [08:02<09:27,  1.93it/s]

Training_loss 11.61432


 45%|████▌     | 908/2000 [08:03<09:31,  1.91it/s]

Training_loss 11.60849


 45%|████▌     | 909/2000 [08:04<09:55,  1.83it/s]

Training_loss 11.60237


 46%|████▌     | 910/2000 [08:04<09:39,  1.88it/s]

Training_loss 11.59655


 46%|████▌     | 911/2000 [08:05<09:34,  1.89it/s]

Training_loss 11.58588


 46%|████▌     | 912/2000 [08:05<09:25,  1.93it/s]

Training_loss 11.57677


 46%|████▌     | 913/2000 [08:05<08:28,  2.14it/s]

Training_loss 11.56582


 46%|████▌     | 914/2000 [08:06<08:45,  2.07it/s]

Training_loss 11.55861


 46%|████▌     | 915/2000 [08:06<08:11,  2.21it/s]

Training_loss 11.55445


 46%|████▌     | 916/2000 [08:07<08:30,  2.12it/s]

Training_loss 11.54985


 46%|████▌     | 917/2000 [08:07<08:58,  2.01it/s]

Training_loss 11.54001


 46%|████▌     | 918/2000 [08:08<09:06,  1.98it/s]

Training_loss 11.53168


 46%|████▌     | 919/2000 [08:09<09:38,  1.87it/s]

Training_loss 11.52575


 46%|████▌     | 920/2000 [08:09<09:42,  1.85it/s]

Training_loss 11.51746


 46%|████▌     | 921/2000 [08:10<09:32,  1.88it/s]

Training_loss 11.51329


 46%|████▌     | 922/2000 [08:10<09:30,  1.89it/s]

Training_loss 11.50897


 46%|████▌     | 923/2000 [08:11<09:33,  1.88it/s]

Training_loss 11.50149


 46%|████▌     | 924/2000 [08:11<09:52,  1.82it/s]

Training_loss 11.49322


 46%|████▋     | 925/2000 [08:12<09:54,  1.81it/s]

Training_loss 11.48382


 46%|████▋     | 926/2000 [08:12<09:38,  1.85it/s]

Training_loss 11.47627


 46%|████▋     | 927/2000 [08:13<09:41,  1.84it/s]

Training_loss 11.47027


 46%|████▋     | 928/2000 [08:13<09:37,  1.86it/s]

Training_loss 11.46453


 46%|████▋     | 929/2000 [08:14<09:35,  1.86it/s]

Training_loss 11.45643


 46%|████▋     | 930/2000 [08:15<09:47,  1.82it/s]

Training_loss 11.45222


 47%|████▋     | 931/2000 [08:15<09:52,  1.80it/s]

Training_loss 11.44583


 47%|████▋     | 932/2000 [08:16<10:32,  1.69it/s]

Training_loss 11.43955


 47%|████▋     | 933/2000 [08:16<10:14,  1.74it/s]

Training_loss 11.43133


 47%|████▋     | 934/2000 [08:17<10:02,  1.77it/s]

Training_loss 11.42535


 47%|████▋     | 935/2000 [08:17<09:58,  1.78it/s]

Training_loss 11.41771


 47%|████▋     | 936/2000 [08:18<10:15,  1.73it/s]

Training_loss 11.41228


 47%|████▋     | 937/2000 [08:19<10:21,  1.71it/s]

Training_loss 11.40742


 47%|████▋     | 938/2000 [08:19<10:40,  1.66it/s]

Training_loss 11.40174


 47%|████▋     | 939/2000 [08:20<10:46,  1.64it/s]

Training_loss 11.39449


 47%|████▋     | 940/2000 [08:20<10:23,  1.70it/s]

Training_loss 11.39152


 47%|████▋     | 941/2000 [08:21<10:08,  1.74it/s]

Training_loss 11.38090


 47%|████▋     | 942/2000 [08:22<09:50,  1.79it/s]

Training_loss 11.37529


 47%|████▋     | 943/2000 [08:22<08:35,  2.05it/s]

Training_loss 11.36651


 47%|████▋     | 944/2000 [08:22<08:56,  1.97it/s]

Training_loss 11.36035


 47%|████▋     | 945/2000 [08:23<07:54,  2.22it/s]

Training_loss 11.35225


 47%|████▋     | 946/2000 [08:23<07:53,  2.23it/s]

Training_loss 11.34619


 47%|████▋     | 947/2000 [08:24<08:20,  2.10it/s]

Training_loss 11.34196


 47%|████▋     | 948/2000 [08:24<08:27,  2.07it/s]

Training_loss 11.33761


 47%|████▋     | 949/2000 [08:25<09:00,  1.95it/s]

Training_loss 11.32461


 48%|████▊     | 950/2000 [08:25<08:59,  1.95it/s]

Training_loss 11.31880


 48%|████▊     | 951/2000 [08:26<08:57,  1.95it/s]

Training_loss 11.30997


 48%|████▊     | 952/2000 [08:26<09:26,  1.85it/s]

Training_loss 11.29951


 48%|████▊     | 953/2000 [08:27<09:38,  1.81it/s]

Training_loss 11.29308


 48%|████▊     | 954/2000 [08:28<09:39,  1.80it/s]

Training_loss 11.28704


 48%|████▊     | 955/2000 [08:28<09:59,  1.74it/s]

Training_loss 11.28154


 48%|████▊     | 956/2000 [08:29<09:54,  1.75it/s]

Training_loss 11.27480


 48%|████▊     | 957/2000 [08:29<10:26,  1.66it/s]

Training_loss 11.27101


 48%|████▊     | 958/2000 [08:30<10:26,  1.66it/s]

Training_loss 11.26450


 48%|████▊     | 959/2000 [08:31<10:14,  1.70it/s]

Training_loss 11.25701


 48%|████▊     | 960/2000 [08:31<10:12,  1.70it/s]

Training_loss 11.25106


 48%|████▊     | 961/2000 [08:32<10:22,  1.67it/s]

Training_loss 11.24084


 48%|████▊     | 962/2000 [08:32<10:15,  1.69it/s]

Training_loss 11.23461


 48%|████▊     | 963/2000 [08:33<10:25,  1.66it/s]

Training_loss 11.22450


 48%|████▊     | 964/2000 [08:34<10:32,  1.64it/s]

Training_loss 11.21276


 48%|████▊     | 965/2000 [08:34<10:31,  1.64it/s]

Training_loss 11.20619


 48%|████▊     | 966/2000 [08:35<10:18,  1.67it/s]

Training_loss 11.20217


 48%|████▊     | 967/2000 [08:35<10:19,  1.67it/s]

Training_loss 11.19423


 48%|████▊     | 968/2000 [08:36<10:23,  1.65it/s]

Training_loss 11.18622


 48%|████▊     | 969/2000 [08:37<10:33,  1.63it/s]

Training_loss 11.18170


 48%|████▊     | 970/2000 [08:37<10:16,  1.67it/s]

Training_loss 11.17432


 49%|████▊     | 971/2000 [08:38<09:10,  1.87it/s]

Training_loss 11.16375


 49%|████▊     | 972/2000 [08:38<09:30,  1.80it/s]

Training_loss 11.16080


 49%|████▊     | 973/2000 [08:39<09:07,  1.88it/s]

Training_loss 11.15495


 49%|████▊     | 974/2000 [08:39<09:00,  1.90it/s]

Training_loss 11.14874


 49%|████▉     | 975/2000 [08:40<08:49,  1.94it/s]

Training_loss 11.14236


 49%|████▉     | 976/2000 [08:40<09:23,  1.82it/s]

Training_loss 11.13567


 49%|████▉     | 977/2000 [08:41<09:30,  1.79it/s]

Training_loss 11.13035


 49%|████▉     | 978/2000 [08:42<09:47,  1.74it/s]

Training_loss 11.11856


 49%|████▉     | 979/2000 [08:42<09:54,  1.72it/s]

Training_loss 11.10959


 49%|████▉     | 980/2000 [08:43<10:10,  1.67it/s]

Training_loss 11.10421


 49%|████▉     | 981/2000 [08:43<10:10,  1.67it/s]

Training_loss 11.09646


 49%|████▉     | 982/2000 [08:44<09:46,  1.73it/s]

Training_loss 11.08831


 49%|████▉     | 983/2000 [08:44<09:28,  1.79it/s]

Training_loss 11.08357


 49%|████▉     | 984/2000 [08:45<09:05,  1.86it/s]

Training_loss 11.07883


 49%|████▉     | 985/2000 [08:45<09:16,  1.82it/s]

Training_loss 11.06979


 49%|████▉     | 986/2000 [08:46<09:42,  1.74it/s]

Training_loss 11.06110


 49%|████▉     | 987/2000 [08:47<10:06,  1.67it/s]

Training_loss 11.05245


 49%|████▉     | 988/2000 [08:47<09:58,  1.69it/s]

Training_loss 11.04523


 49%|████▉     | 989/2000 [08:48<09:47,  1.72it/s]

Training_loss 11.03440


 50%|████▉     | 990/2000 [08:48<09:23,  1.79it/s]

Training_loss 11.02657


 50%|████▉     | 991/2000 [08:49<09:35,  1.75it/s]

Training_loss 11.02121


 50%|████▉     | 992/2000 [08:50<09:57,  1.69it/s]

Training_loss 11.01270


 50%|████▉     | 993/2000 [08:50<10:00,  1.68it/s]

Training_loss 11.00773


 50%|████▉     | 994/2000 [08:51<10:26,  1.61it/s]

Training_loss 11.00264


 50%|████▉     | 995/2000 [08:52<10:50,  1.55it/s]

Training_loss 10.99703


 50%|████▉     | 996/2000 [08:52<10:44,  1.56it/s]

Training_loss 10.99211


 50%|████▉     | 997/2000 [08:53<10:45,  1.55it/s]

Training_loss 10.98532


 50%|████▉     | 998/2000 [08:53<09:51,  1.69it/s]

Training_loss 10.97508


 50%|████▉     | 999/2000 [08:54<09:11,  1.82it/s]

Training_loss 10.97082


 50%|█████     | 1000/2000 [08:54<08:44,  1.91it/s]

Training_loss 10.96657


 50%|█████     | 1001/2000 [08:55<07:39,  2.18it/s]

Training_loss 10.95770


 50%|█████     | 1002/2000 [08:55<07:58,  2.09it/s]

Training_loss 10.95039


 50%|█████     | 1003/2000 [08:56<08:38,  1.92it/s]

Training_loss 10.94299


 50%|█████     | 1004/2000 [08:56<09:05,  1.82it/s]

Training_loss 10.93437


 50%|█████     | 1005/2000 [08:57<09:28,  1.75it/s]

Training_loss 10.92595


 50%|█████     | 1006/2000 [08:58<09:46,  1.70it/s]

Training_loss 10.91577


 50%|█████     | 1007/2000 [08:58<10:05,  1.64it/s]

Training_loss 10.91114


 50%|█████     | 1008/2000 [08:59<10:46,  1.54it/s]

Training_loss 10.90629


 50%|█████     | 1009/2000 [09:00<10:33,  1.56it/s]

Training_loss 10.90436


 50%|█████     | 1010/2000 [09:00<10:22,  1.59it/s]

Training_loss 10.89452


 51%|█████     | 1011/2000 [09:01<10:03,  1.64it/s]

Training_loss 10.88220


 51%|█████     | 1012/2000 [09:01<10:07,  1.63it/s]

Training_loss 10.87199


 51%|█████     | 1013/2000 [09:02<09:49,  1.67it/s]

Training_loss 10.86224


 51%|█████     | 1014/2000 [09:03<09:55,  1.65it/s]

Training_loss 10.85441


 51%|█████     | 1015/2000 [09:03<10:02,  1.63it/s]

Training_loss 10.84767


 51%|█████     | 1016/2000 [09:04<10:08,  1.62it/s]

Training_loss 10.84213


 51%|█████     | 1017/2000 [09:05<10:31,  1.56it/s]

Training_loss 10.83237


 51%|█████     | 1018/2000 [09:05<10:43,  1.53it/s]

Training_loss 10.82285


 51%|█████     | 1019/2000 [09:06<10:48,  1.51it/s]

Training_loss 10.81560


 51%|█████     | 1020/2000 [09:06<10:12,  1.60it/s]

Training_loss 10.81078


 51%|█████     | 1021/2000 [09:07<10:25,  1.57it/s]

Training_loss 10.80306


 51%|█████     | 1022/2000 [09:08<10:41,  1.53it/s]

Training_loss 10.79191


 51%|█████     | 1023/2000 [09:08<10:31,  1.55it/s]

Training_loss 10.78618


 51%|█████     | 1024/2000 [09:09<10:50,  1.50it/s]

Training_loss 10.77847


 51%|█████▏    | 1025/2000 [09:10<09:41,  1.68it/s]

Training_loss 10.77268


 51%|█████▏    | 1026/2000 [09:10<08:54,  1.82it/s]

Training_loss 10.76632


 51%|█████▏    | 1027/2000 [09:11<08:44,  1.86it/s]

Training_loss 10.75774


 51%|█████▏    | 1028/2000 [09:11<09:19,  1.74it/s]

Training_loss 10.75045


 51%|█████▏    | 1029/2000 [09:12<10:04,  1.61it/s]

Training_loss 10.74085


 52%|█████▏    | 1030/2000 [09:13<10:10,  1.59it/s]

Training_loss 10.73538


 52%|█████▏    | 1031/2000 [09:13<10:20,  1.56it/s]

Training_loss 10.72444


 52%|█████▏    | 1032/2000 [09:14<10:33,  1.53it/s]

Training_loss 10.71094


 52%|█████▏    | 1033/2000 [09:15<10:19,  1.56it/s]

Training_loss 10.70315


 52%|█████▏    | 1034/2000 [09:15<09:57,  1.62it/s]

Training_loss 10.69326


 52%|█████▏    | 1035/2000 [09:16<09:31,  1.69it/s]

Training_loss 10.68498


 52%|█████▏    | 1036/2000 [09:16<09:18,  1.73it/s]

Training_loss 10.67975


 52%|█████▏    | 1037/2000 [09:17<08:57,  1.79it/s]

Training_loss 10.67234


 52%|█████▏    | 1038/2000 [09:17<09:08,  1.75it/s]

Training_loss 10.66769


 52%|█████▏    | 1039/2000 [09:18<09:18,  1.72it/s]

Training_loss 10.66279


 52%|█████▏    | 1040/2000 [09:18<09:17,  1.72it/s]

Training_loss 10.65431


 52%|█████▏    | 1041/2000 [09:19<09:22,  1.71it/s]

Training_loss 10.64906


 52%|█████▏    | 1042/2000 [09:20<09:03,  1.76it/s]

Training_loss 10.64099


 52%|█████▏    | 1043/2000 [09:20<08:57,  1.78it/s]

Training_loss 10.63380


 52%|█████▏    | 1044/2000 [09:21<09:17,  1.71it/s]

Training_loss 10.63069


 52%|█████▏    | 1045/2000 [09:21<09:15,  1.72it/s]

Training_loss 10.62464


 52%|█████▏    | 1046/2000 [09:22<09:11,  1.73it/s]

Training_loss 10.61984


 52%|█████▏    | 1047/2000 [09:22<08:46,  1.81it/s]

Training_loss 10.61525


 52%|█████▏    | 1048/2000 [09:23<08:56,  1.78it/s]

Training_loss 10.60613


 52%|█████▏    | 1049/2000 [09:24<09:21,  1.70it/s]

Training_loss 10.59829


 52%|█████▎    | 1050/2000 [09:24<09:19,  1.70it/s]

Training_loss 10.58562


 53%|█████▎    | 1051/2000 [09:25<09:02,  1.75it/s]

Training_loss 10.57443


 53%|█████▎    | 1052/2000 [09:25<09:09,  1.72it/s]

Training_loss 10.56587


 53%|█████▎    | 1053/2000 [09:26<08:26,  1.87it/s]

Training_loss 10.56020


 53%|█████▎    | 1054/2000 [09:26<08:00,  1.97it/s]

Training_loss 10.55405


 53%|█████▎    | 1055/2000 [09:27<08:16,  1.90it/s]

Training_loss 10.54617


 53%|█████▎    | 1056/2000 [09:27<08:34,  1.83it/s]

Training_loss 10.53693


 53%|█████▎    | 1057/2000 [09:28<08:34,  1.83it/s]

Training_loss 10.52641


 53%|█████▎    | 1058/2000 [09:29<09:02,  1.74it/s]

Training_loss 10.51997


 53%|█████▎    | 1059/2000 [09:29<09:21,  1.68it/s]

Training_loss 10.51410


 53%|█████▎    | 1060/2000 [09:30<09:14,  1.70it/s]

Training_loss 10.50693


 53%|█████▎    | 1061/2000 [09:30<09:05,  1.72it/s]

Training_loss 10.50234


 53%|█████▎    | 1062/2000 [09:31<08:51,  1.77it/s]

Training_loss 10.49404


 53%|█████▎    | 1063/2000 [09:32<09:05,  1.72it/s]

Training_loss 10.48518


 53%|█████▎    | 1064/2000 [09:32<09:08,  1.71it/s]

Training_loss 10.47571


 53%|█████▎    | 1065/2000 [09:33<09:06,  1.71it/s]

Training_loss 10.47082


 53%|█████▎    | 1066/2000 [09:33<09:03,  1.72it/s]

Training_loss 10.46259


 53%|█████▎    | 1067/2000 [09:34<09:00,  1.73it/s]

Training_loss 10.45813


 53%|█████▎    | 1068/2000 [09:34<08:44,  1.78it/s]

Training_loss 10.44834


 53%|█████▎    | 1069/2000 [09:35<08:43,  1.78it/s]

Training_loss 10.44332


 54%|█████▎    | 1070/2000 [09:36<08:42,  1.78it/s]

Training_loss 10.43651


 54%|█████▎    | 1071/2000 [09:36<08:52,  1.74it/s]

Training_loss 10.42108


 54%|█████▎    | 1072/2000 [09:37<09:43,  1.59it/s]

Training_loss 10.40979


 54%|█████▎    | 1073/2000 [09:37<09:45,  1.58it/s]

Training_loss 10.40042


 54%|█████▎    | 1074/2000 [09:38<09:53,  1.56it/s]

Training_loss 10.39544


 54%|█████▍    | 1075/2000 [09:39<09:40,  1.59it/s]

Training_loss 10.38934


 54%|█████▍    | 1076/2000 [09:39<09:23,  1.64it/s]

Training_loss 10.38147


 54%|█████▍    | 1077/2000 [09:40<09:25,  1.63it/s]

Training_loss 10.37514


 54%|█████▍    | 1078/2000 [09:41<09:21,  1.64it/s]

Training_loss 10.36817


 54%|█████▍    | 1079/2000 [09:41<09:12,  1.67it/s]

Training_loss 10.36233


 54%|█████▍    | 1080/2000 [09:42<08:32,  1.79it/s]

Training_loss 10.35500


 54%|█████▍    | 1081/2000 [09:42<08:37,  1.78it/s]

Training_loss 10.35118


 54%|█████▍    | 1082/2000 [09:43<08:25,  1.82it/s]

Training_loss 10.34639


 54%|█████▍    | 1083/2000 [09:43<08:05,  1.89it/s]

Training_loss 10.34175


 54%|█████▍    | 1084/2000 [09:44<08:17,  1.84it/s]

Training_loss 10.33827


 54%|█████▍    | 1085/2000 [09:44<08:19,  1.83it/s]

Training_loss 10.33011


 54%|█████▍    | 1086/2000 [09:45<08:17,  1.84it/s]

Training_loss 10.31423


 54%|█████▍    | 1087/2000 [09:45<08:16,  1.84it/s]

Training_loss 10.30821


 54%|█████▍    | 1088/2000 [09:46<08:18,  1.83it/s]

Training_loss 10.29652


 54%|█████▍    | 1089/2000 [09:47<08:37,  1.76it/s]

Training_loss 10.28227


 55%|█████▍    | 1090/2000 [09:47<08:34,  1.77it/s]

Training_loss 10.27242


 55%|█████▍    | 1091/2000 [09:48<08:32,  1.77it/s]

Training_loss 10.27136


 55%|█████▍    | 1092/2000 [09:48<09:01,  1.68it/s]

Training_loss 10.26723


 55%|█████▍    | 1093/2000 [09:49<09:11,  1.64it/s]

Training_loss 10.25701


 55%|█████▍    | 1094/2000 [09:50<09:24,  1.61it/s]

Training_loss 10.24826


 55%|█████▍    | 1095/2000 [09:50<09:36,  1.57it/s]

Training_loss 10.23691


 55%|█████▍    | 1096/2000 [09:51<09:38,  1.56it/s]

Training_loss 10.22852


 55%|█████▍    | 1097/2000 [09:52<09:40,  1.55it/s]

Training_loss 10.22245


 55%|█████▍    | 1098/2000 [09:52<09:40,  1.55it/s]

Training_loss 10.21693


 55%|█████▍    | 1099/2000 [09:53<09:20,  1.61it/s]

Training_loss 10.21038


 55%|█████▌    | 1100/2000 [09:53<09:18,  1.61it/s]

Training_loss 10.19933


 55%|█████▌    | 1101/2000 [09:54<09:24,  1.59it/s]

Training_loss 10.19182


 55%|█████▌    | 1102/2000 [09:55<09:42,  1.54it/s]

Training_loss 10.18385


 55%|█████▌    | 1103/2000 [09:55<09:49,  1.52it/s]

Training_loss 10.17740


 55%|█████▌    | 1104/2000 [09:56<09:33,  1.56it/s]

Training_loss 10.17166


 55%|█████▌    | 1105/2000 [09:57<09:32,  1.56it/s]

Training_loss 10.16365


 55%|█████▌    | 1106/2000 [09:57<09:31,  1.56it/s]

Training_loss 10.15425


 55%|█████▌    | 1107/2000 [09:58<08:58,  1.66it/s]

Training_loss 10.14750


 55%|█████▌    | 1108/2000 [09:58<08:39,  1.72it/s]

Training_loss 10.13593


 55%|█████▌    | 1109/2000 [09:59<08:49,  1.68it/s]

Training_loss 10.12774


 56%|█████▌    | 1110/2000 [09:59<08:06,  1.83it/s]

Training_loss 10.12009


 56%|█████▌    | 1111/2000 [10:00<08:13,  1.80it/s]

Training_loss 10.11367


 56%|█████▌    | 1112/2000 [10:01<08:04,  1.83it/s]

Training_loss 10.10836


 56%|█████▌    | 1113/2000 [10:01<08:18,  1.78it/s]

Training_loss 10.10005


 56%|█████▌    | 1114/2000 [10:02<08:49,  1.67it/s]

Training_loss 10.09166


 56%|█████▌    | 1115/2000 [10:02<08:49,  1.67it/s]

Training_loss 10.08459


 56%|█████▌    | 1116/2000 [10:03<08:44,  1.69it/s]

Training_loss 10.08095


 56%|█████▌    | 1117/2000 [10:04<09:05,  1.62it/s]

Training_loss 10.07286


 56%|█████▌    | 1118/2000 [10:04<09:05,  1.62it/s]

Training_loss 10.06623


 56%|█████▌    | 1119/2000 [10:05<09:17,  1.58it/s]

Training_loss 10.05473


 56%|█████▌    | 1120/2000 [10:06<09:33,  1.53it/s]

Training_loss 10.04593


 56%|█████▌    | 1121/2000 [10:06<09:16,  1.58it/s]

Training_loss 10.03553


 56%|█████▌    | 1122/2000 [10:07<09:16,  1.58it/s]

Training_loss 10.02841


 56%|█████▌    | 1123/2000 [10:07<08:56,  1.64it/s]

Training_loss 10.01541


 56%|█████▌    | 1124/2000 [10:08<08:51,  1.65it/s]

Training_loss 10.01029


 56%|█████▋    | 1125/2000 [10:09<08:45,  1.66it/s]

Training_loss 10.00232


 56%|█████▋    | 1126/2000 [10:09<08:30,  1.71it/s]

Training_loss 9.99734


 56%|█████▋    | 1127/2000 [10:10<08:03,  1.80it/s]

Training_loss 9.99140


 56%|█████▋    | 1128/2000 [10:10<07:58,  1.82it/s]

Training_loss 9.98238


 56%|█████▋    | 1129/2000 [10:11<07:52,  1.85it/s]

Training_loss 9.97732


 56%|█████▋    | 1130/2000 [10:11<07:49,  1.85it/s]

Training_loss 9.96954


 57%|█████▋    | 1131/2000 [10:12<07:59,  1.81it/s]

Training_loss 9.95403


 57%|█████▋    | 1132/2000 [10:12<07:51,  1.84it/s]

Training_loss 9.95011


 57%|█████▋    | 1133/2000 [10:13<07:47,  1.85it/s]

Training_loss 9.94424


 57%|█████▋    | 1134/2000 [10:13<06:55,  2.09it/s]

Training_loss 9.93778


 57%|█████▋    | 1135/2000 [10:14<06:49,  2.11it/s]

Training_loss 9.93335


 57%|█████▋    | 1136/2000 [10:14<06:24,  2.25it/s]

Training_loss 9.92044


 57%|█████▋    | 1137/2000 [10:15<06:40,  2.16it/s]

Training_loss 9.91367


 57%|█████▋    | 1138/2000 [10:15<06:51,  2.09it/s]

Training_loss 9.90959


 57%|█████▋    | 1139/2000 [10:16<07:07,  2.01it/s]

Training_loss 9.89935


 57%|█████▋    | 1140/2000 [10:16<07:20,  1.95it/s]

Training_loss 9.88965


 57%|█████▋    | 1141/2000 [10:17<07:31,  1.90it/s]

Training_loss 9.88503


 57%|█████▋    | 1142/2000 [10:17<07:34,  1.89it/s]

Training_loss 9.87848


 57%|█████▋    | 1143/2000 [10:18<07:38,  1.87it/s]

Training_loss 9.87705


 57%|█████▋    | 1144/2000 [10:18<07:41,  1.85it/s]

Training_loss 9.87305


 57%|█████▋    | 1145/2000 [10:19<07:57,  1.79it/s]

Training_loss 9.85986


 57%|█████▋    | 1146/2000 [10:20<08:09,  1.75it/s]

Training_loss 9.85469


 57%|█████▋    | 1147/2000 [10:20<08:05,  1.76it/s]

Training_loss 9.84649


 57%|█████▋    | 1148/2000 [10:21<08:08,  1.75it/s]

Training_loss 9.84504


 57%|█████▋    | 1149/2000 [10:21<08:31,  1.67it/s]

Training_loss 9.83826


 57%|█████▊    | 1150/2000 [10:22<08:44,  1.62it/s]

Training_loss 9.83691


 58%|█████▊    | 1151/2000 [10:23<08:47,  1.61it/s]

Training_loss 9.82977


 58%|█████▊    | 1152/2000 [10:23<08:25,  1.68it/s]

Training_loss 9.80945


 58%|█████▊    | 1153/2000 [10:24<08:19,  1.70it/s]

Training_loss 9.79462


 58%|█████▊    | 1154/2000 [10:24<08:30,  1.66it/s]

Training_loss 9.78504


 58%|█████▊    | 1155/2000 [10:25<08:28,  1.66it/s]

Training_loss 9.77341


 58%|█████▊    | 1156/2000 [10:26<08:25,  1.67it/s]

Training_loss 9.76456


 58%|█████▊    | 1157/2000 [10:26<08:24,  1.67it/s]

Training_loss 9.75416


 58%|█████▊    | 1158/2000 [10:27<08:13,  1.71it/s]

Training_loss 9.74591


 58%|█████▊    | 1159/2000 [10:27<08:12,  1.71it/s]

Training_loss 9.73828


 58%|█████▊    | 1160/2000 [10:28<08:30,  1.65it/s]

Training_loss 9.73113


 58%|█████▊    | 1161/2000 [10:29<08:28,  1.65it/s]

Training_loss 9.72339


 58%|█████▊    | 1162/2000 [10:29<08:15,  1.69it/s]

Training_loss 9.71533


 58%|█████▊    | 1163/2000 [10:30<07:19,  1.91it/s]

Training_loss 9.70293


 58%|█████▊    | 1164/2000 [10:30<07:24,  1.88it/s]

Training_loss 9.69686


 58%|█████▊    | 1165/2000 [10:31<07:35,  1.83it/s]

Training_loss 9.68935


 58%|█████▊    | 1166/2000 [10:31<07:39,  1.82it/s]

Training_loss 9.68424


 58%|█████▊    | 1167/2000 [10:32<07:45,  1.79it/s]

Training_loss 9.67304


 58%|█████▊    | 1168/2000 [10:32<07:31,  1.84it/s]

Training_loss 9.66802


 58%|█████▊    | 1169/2000 [10:33<07:11,  1.92it/s]

Training_loss 9.66218


 58%|█████▊    | 1170/2000 [10:33<07:15,  1.90it/s]

Training_loss 9.65362


 59%|█████▊    | 1171/2000 [10:34<07:06,  1.94it/s]

Training_loss 9.64456


 59%|█████▊    | 1172/2000 [10:34<07:19,  1.88it/s]

Training_loss 9.63150


 59%|█████▊    | 1173/2000 [10:35<07:33,  1.82it/s]

Training_loss 9.62222


 59%|█████▊    | 1174/2000 [10:36<07:44,  1.78it/s]

Training_loss 9.61443


 59%|█████▉    | 1175/2000 [10:36<07:33,  1.82it/s]

Training_loss 9.60544


 59%|█████▉    | 1176/2000 [10:37<07:44,  1.77it/s]

Training_loss 9.59669


 59%|█████▉    | 1177/2000 [10:37<07:24,  1.85it/s]

Training_loss 9.58375


 59%|█████▉    | 1178/2000 [10:38<07:32,  1.82it/s]

Training_loss 9.57651


 59%|█████▉    | 1179/2000 [10:38<07:48,  1.75it/s]

Training_loss 9.56912


 59%|█████▉    | 1180/2000 [10:39<07:51,  1.74it/s]

Training_loss 9.56380


 59%|█████▉    | 1181/2000 [10:40<08:04,  1.69it/s]

Training_loss 9.56004


 59%|█████▉    | 1182/2000 [10:40<08:13,  1.66it/s]

Training_loss 9.55044


 59%|█████▉    | 1183/2000 [10:41<08:16,  1.65it/s]

Training_loss 9.54185


 59%|█████▉    | 1184/2000 [10:41<08:15,  1.65it/s]

Training_loss 9.53316


 59%|█████▉    | 1185/2000 [10:42<08:10,  1.66it/s]

Training_loss 9.52890


 59%|█████▉    | 1186/2000 [10:43<08:00,  1.69it/s]

Training_loss 9.52491


 59%|█████▉    | 1187/2000 [10:43<07:52,  1.72it/s]

Training_loss 9.52341


 59%|█████▉    | 1188/2000 [10:44<07:55,  1.71it/s]

Training_loss 9.51740


 59%|█████▉    | 1189/2000 [10:44<07:53,  1.71it/s]

Training_loss 9.50958


 60%|█████▉    | 1190/2000 [10:45<07:44,  1.74it/s]

Training_loss 9.50443


 60%|█████▉    | 1191/2000 [10:45<07:31,  1.79it/s]

Training_loss 9.48910


 60%|█████▉    | 1192/2000 [10:46<07:34,  1.78it/s]

Training_loss 9.47401


 60%|█████▉    | 1193/2000 [10:47<07:38,  1.76it/s]

Training_loss 9.46660


 60%|█████▉    | 1194/2000 [10:47<07:34,  1.77it/s]

Training_loss 9.45753


 60%|█████▉    | 1195/2000 [10:48<07:35,  1.77it/s]

Training_loss 9.45331


 60%|█████▉    | 1196/2000 [10:48<07:33,  1.77it/s]

Training_loss 9.44577


 60%|█████▉    | 1197/2000 [10:49<07:38,  1.75it/s]

Training_loss 9.43825


 60%|█████▉    | 1198/2000 [10:49<07:48,  1.71it/s]

Training_loss 9.43398


 60%|█████▉    | 1199/2000 [10:50<07:51,  1.70it/s]

Training_loss 9.42499


 60%|██████    | 1200/2000 [10:51<07:39,  1.74it/s]

Training_loss 9.41629


 60%|██████    | 1201/2000 [10:51<07:53,  1.69it/s]

Training_loss 9.40816


 60%|██████    | 1202/2000 [10:52<08:09,  1.63it/s]

Training_loss 9.40051


 60%|██████    | 1203/2000 [10:52<08:06,  1.64it/s]

Training_loss 9.39427


 60%|██████    | 1204/2000 [10:53<07:54,  1.68it/s]

Training_loss 9.38851


 60%|██████    | 1205/2000 [10:54<08:04,  1.64it/s]

Training_loss 9.38508


 60%|██████    | 1206/2000 [10:54<07:56,  1.67it/s]

Training_loss 9.37408


 60%|██████    | 1207/2000 [10:55<07:43,  1.71it/s]

Training_loss 9.37095


 60%|██████    | 1208/2000 [10:55<07:34,  1.74it/s]

Training_loss 9.36494


 60%|██████    | 1209/2000 [10:56<07:37,  1.73it/s]

Training_loss 9.35258


 60%|██████    | 1210/2000 [10:57<07:40,  1.72it/s]

Training_loss 9.34214


 61%|██████    | 1211/2000 [10:57<07:34,  1.74it/s]

Training_loss 9.33408


 61%|██████    | 1212/2000 [10:58<07:14,  1.81it/s]

Training_loss 9.32213


 61%|██████    | 1213/2000 [10:58<07:08,  1.84it/s]

Training_loss 9.31147


 61%|██████    | 1214/2000 [10:59<07:09,  1.83it/s]

Training_loss 9.30429


 61%|██████    | 1215/2000 [10:59<07:00,  1.87it/s]

Training_loss 9.29091


 61%|██████    | 1216/2000 [11:00<06:53,  1.89it/s]

Training_loss 9.27880


 61%|██████    | 1217/2000 [11:00<07:02,  1.85it/s]

Training_loss 9.27321


 61%|██████    | 1218/2000 [11:01<07:03,  1.85it/s]

Training_loss 9.26451


 61%|██████    | 1219/2000 [11:01<07:01,  1.85it/s]

Training_loss 9.25923


 61%|██████    | 1220/2000 [11:02<06:23,  2.04it/s]

Training_loss 9.24535


 61%|██████    | 1221/2000 [11:02<06:53,  1.88it/s]

Training_loss 9.23504


 61%|██████    | 1222/2000 [11:03<06:27,  2.01it/s]

Training_loss 9.21942


 61%|██████    | 1223/2000 [11:03<06:44,  1.92it/s]

Training_loss 9.21033


 61%|██████    | 1224/2000 [11:04<06:56,  1.86it/s]

Training_loss 9.20214


 61%|██████▏   | 1225/2000 [11:04<06:59,  1.85it/s]

Training_loss 9.18969


 61%|██████▏   | 1226/2000 [11:05<07:02,  1.83it/s]

Training_loss 9.18400


 61%|██████▏   | 1227/2000 [11:06<07:18,  1.76it/s]

Training_loss 9.17779


 61%|██████▏   | 1228/2000 [11:06<07:27,  1.73it/s]

Training_loss 9.16790


 61%|██████▏   | 1229/2000 [11:07<07:18,  1.76it/s]

Training_loss 9.16023


 62%|██████▏   | 1230/2000 [11:07<07:06,  1.81it/s]

Training_loss 9.15141


 62%|██████▏   | 1231/2000 [11:08<06:59,  1.83it/s]

Training_loss 9.14312


 62%|██████▏   | 1232/2000 [11:08<06:52,  1.86it/s]

Training_loss 9.13449


 62%|██████▏   | 1233/2000 [11:09<06:47,  1.88it/s]

Training_loss 9.12355


 62%|██████▏   | 1234/2000 [11:09<06:52,  1.86it/s]

Training_loss 9.11525


 62%|██████▏   | 1235/2000 [11:10<06:40,  1.91it/s]

Training_loss 9.10716


 62%|██████▏   | 1236/2000 [11:10<06:48,  1.87it/s]

Training_loss 9.09690


 62%|██████▏   | 1237/2000 [11:11<06:42,  1.89it/s]

Training_loss 9.08896


 62%|██████▏   | 1238/2000 [11:11<06:31,  1.95it/s]

Training_loss 9.08256


 62%|██████▏   | 1239/2000 [11:12<06:40,  1.90it/s]

Training_loss 9.07536


 62%|██████▏   | 1240/2000 [11:12<06:35,  1.92it/s]

Training_loss 9.06048


 62%|██████▏   | 1241/2000 [11:13<06:38,  1.90it/s]

Training_loss 9.05249


 62%|██████▏   | 1242/2000 [11:14<06:27,  1.96it/s]

Training_loss 9.04334


 62%|██████▏   | 1243/2000 [11:14<06:27,  1.95it/s]

Training_loss 9.03480


 62%|██████▏   | 1244/2000 [11:15<06:32,  1.93it/s]

Training_loss 9.02529


 62%|██████▏   | 1245/2000 [11:15<06:29,  1.94it/s]

Training_loss 9.01989


 62%|██████▏   | 1246/2000 [11:16<06:30,  1.93it/s]

Training_loss 9.01245


 62%|██████▏   | 1247/2000 [11:16<06:37,  1.89it/s]

Training_loss 9.00541


 62%|██████▏   | 1248/2000 [11:17<06:31,  1.92it/s]

Training_loss 8.99865


 62%|██████▏   | 1249/2000 [11:17<06:20,  1.97it/s]

Training_loss 8.98812


 62%|██████▎   | 1250/2000 [11:17<05:41,  2.19it/s]

Training_loss 8.98173


 63%|██████▎   | 1251/2000 [11:18<05:26,  2.30it/s]

Training_loss 8.97440


 63%|██████▎   | 1252/2000 [11:18<05:19,  2.34it/s]

Training_loss 8.96678


 63%|██████▎   | 1253/2000 [11:19<05:31,  2.25it/s]

Training_loss 8.95318


 63%|██████▎   | 1254/2000 [11:19<05:50,  2.13it/s]

Training_loss 8.93869


 63%|██████▎   | 1255/2000 [11:20<05:53,  2.11it/s]

Training_loss 8.93420


 63%|██████▎   | 1256/2000 [11:20<06:10,  2.01it/s]

Training_loss 8.92806


 63%|██████▎   | 1257/2000 [11:21<06:27,  1.92it/s]

Training_loss 8.91913


 63%|██████▎   | 1258/2000 [11:21<06:41,  1.85it/s]

Training_loss 8.91177


 63%|██████▎   | 1259/2000 [11:22<06:41,  1.85it/s]

Training_loss 8.90967


 63%|██████▎   | 1260/2000 [11:23<06:54,  1.79it/s]

Training_loss 8.90161


 63%|██████▎   | 1261/2000 [11:23<06:43,  1.83it/s]

Training_loss 8.89437


 63%|██████▎   | 1262/2000 [11:24<06:42,  1.83it/s]

Training_loss 8.88704


 63%|██████▎   | 1263/2000 [11:24<06:43,  1.83it/s]

Training_loss 8.87591


 63%|██████▎   | 1264/2000 [11:25<06:43,  1.82it/s]

Training_loss 8.86562


 63%|██████▎   | 1265/2000 [11:25<06:37,  1.85it/s]

Training_loss 8.85961


 63%|██████▎   | 1266/2000 [11:26<06:47,  1.80it/s]

Training_loss 8.85493


 63%|██████▎   | 1267/2000 [11:26<06:57,  1.75it/s]

Training_loss 8.84633


 63%|██████▎   | 1268/2000 [11:27<07:02,  1.73it/s]

Training_loss 8.83707


 63%|██████▎   | 1269/2000 [11:28<07:16,  1.67it/s]

Training_loss 8.83280


 64%|██████▎   | 1270/2000 [11:28<07:16,  1.67it/s]

Training_loss 8.82609


 64%|██████▎   | 1271/2000 [11:29<07:40,  1.58it/s]

Training_loss 8.81347


 64%|██████▎   | 1272/2000 [11:30<07:29,  1.62it/s]

Training_loss 8.80531


 64%|██████▎   | 1273/2000 [11:30<07:26,  1.63it/s]

Training_loss 8.79656


 64%|██████▎   | 1274/2000 [11:31<07:28,  1.62it/s]

Training_loss 8.78745


 64%|██████▍   | 1275/2000 [11:32<07:40,  1.58it/s]

Training_loss 8.77943


 64%|██████▍   | 1276/2000 [11:32<07:26,  1.62it/s]

Training_loss 8.77212


 64%|██████▍   | 1277/2000 [11:33<07:22,  1.63it/s]

Training_loss 8.76437


 64%|██████▍   | 1278/2000 [11:33<06:53,  1.74it/s]

Training_loss 8.75839


 64%|██████▍   | 1279/2000 [11:34<06:00,  2.00it/s]

Training_loss 8.74774


 64%|██████▍   | 1280/2000 [11:34<06:22,  1.88it/s]

Training_loss 8.74083


 64%|██████▍   | 1281/2000 [11:35<06:21,  1.89it/s]

Training_loss 8.73000


 64%|██████▍   | 1282/2000 [11:35<06:42,  1.78it/s]

Training_loss 8.72063


 64%|██████▍   | 1283/2000 [11:36<07:02,  1.70it/s]

Training_loss 8.71305


 64%|██████▍   | 1284/2000 [11:37<07:00,  1.70it/s]

Training_loss 8.70726


 64%|██████▍   | 1285/2000 [11:37<06:49,  1.74it/s]

Training_loss 8.70157


 64%|██████▍   | 1286/2000 [11:38<06:53,  1.73it/s]

Training_loss 8.68724


 64%|██████▍   | 1287/2000 [11:38<06:58,  1.70it/s]

Training_loss 8.67939


 64%|██████▍   | 1288/2000 [11:39<06:52,  1.73it/s]

Training_loss 8.66972


 64%|██████▍   | 1289/2000 [11:39<06:47,  1.75it/s]

Training_loss 8.66607


 64%|██████▍   | 1290/2000 [11:40<06:39,  1.78it/s]

Training_loss 8.65723


 65%|██████▍   | 1291/2000 [11:41<06:59,  1.69it/s]

Training_loss 8.65291


 65%|██████▍   | 1292/2000 [11:41<07:32,  1.57it/s]

Training_loss 8.64578


 65%|██████▍   | 1293/2000 [11:42<07:06,  1.66it/s]

Training_loss 8.63670


 65%|██████▍   | 1294/2000 [11:42<06:53,  1.71it/s]

Training_loss 8.62983


 65%|██████▍   | 1295/2000 [11:43<06:37,  1.77it/s]

Training_loss 8.62501


 65%|██████▍   | 1296/2000 [11:43<06:27,  1.82it/s]

Training_loss 8.61732


 65%|██████▍   | 1297/2000 [11:44<06:40,  1.76it/s]

Training_loss 8.60042


 65%|██████▍   | 1298/2000 [11:45<06:40,  1.75it/s]

Training_loss 8.59531


 65%|██████▍   | 1299/2000 [11:45<06:35,  1.77it/s]

Training_loss 8.59274


 65%|██████▌   | 1300/2000 [11:46<06:33,  1.78it/s]

Training_loss 8.58719


 65%|██████▌   | 1301/2000 [11:46<06:24,  1.82it/s]

Training_loss 8.57809


 65%|██████▌   | 1302/2000 [11:47<06:22,  1.82it/s]

Training_loss 8.57040


 65%|██████▌   | 1303/2000 [11:47<06:05,  1.91it/s]

Training_loss 8.56144


 65%|██████▌   | 1304/2000 [11:48<06:07,  1.90it/s]

Training_loss 8.55235


 65%|██████▌   | 1305/2000 [11:48<06:02,  1.92it/s]

Training_loss 8.54272


 65%|██████▌   | 1306/2000 [11:49<06:28,  1.79it/s]

Training_loss 8.53900


 65%|██████▌   | 1307/2000 [11:49<05:56,  1.94it/s]

Training_loss 8.52754


 65%|██████▌   | 1308/2000 [11:50<05:47,  1.99it/s]

Training_loss 8.52424


 65%|██████▌   | 1309/2000 [11:50<05:40,  2.03it/s]

Training_loss 8.51830


 66%|██████▌   | 1310/2000 [11:51<05:34,  2.06it/s]

Training_loss 8.50868


 66%|██████▌   | 1311/2000 [11:51<05:40,  2.02it/s]

Training_loss 8.50243


 66%|██████▌   | 1312/2000 [11:52<06:03,  1.89it/s]

Training_loss 8.49738


 66%|██████▌   | 1313/2000 [11:52<06:11,  1.85it/s]

Training_loss 8.49094


 66%|██████▌   | 1314/2000 [11:53<06:19,  1.81it/s]

Training_loss 8.48191


 66%|██████▌   | 1315/2000 [11:54<06:20,  1.80it/s]

Training_loss 8.46922


 66%|██████▌   | 1316/2000 [11:54<06:18,  1.81it/s]

Training_loss 8.46365


 66%|██████▌   | 1317/2000 [11:55<06:03,  1.88it/s]

Training_loss 8.45525


 66%|██████▌   | 1318/2000 [11:55<06:02,  1.88it/s]

Training_loss 8.44753


 66%|██████▌   | 1319/2000 [11:56<06:01,  1.88it/s]

Training_loss 8.43704


 66%|██████▌   | 1320/2000 [11:56<06:09,  1.84it/s]

Training_loss 8.42820


 66%|██████▌   | 1321/2000 [11:57<06:07,  1.85it/s]

Training_loss 8.41777


 66%|██████▌   | 1322/2000 [11:57<06:05,  1.85it/s]

Training_loss 8.41376


 66%|██████▌   | 1323/2000 [11:58<06:09,  1.83it/s]

Training_loss 8.40467


 66%|██████▌   | 1324/2000 [11:58<06:08,  1.83it/s]

Training_loss 8.39615


 66%|██████▋   | 1325/2000 [11:59<06:08,  1.83it/s]

Training_loss 8.38827


 66%|██████▋   | 1326/2000 [12:00<06:11,  1.82it/s]

Training_loss 8.37785


 66%|██████▋   | 1327/2000 [12:00<05:56,  1.89it/s]

Training_loss 8.37123


 66%|██████▋   | 1328/2000 [12:01<05:50,  1.92it/s]

Training_loss 8.36069


 66%|██████▋   | 1329/2000 [12:01<05:51,  1.91it/s]

Training_loss 8.34842


 66%|██████▋   | 1330/2000 [12:02<05:53,  1.89it/s]

Training_loss 8.33549


 67%|██████▋   | 1331/2000 [12:02<06:07,  1.82it/s]

Training_loss 8.33120


 67%|██████▋   | 1332/2000 [12:03<05:57,  1.87it/s]

Training_loss 8.32403


 67%|██████▋   | 1333/2000 [12:03<06:06,  1.82it/s]

Training_loss 8.31584


 67%|██████▋   | 1334/2000 [12:04<05:59,  1.85it/s]

Training_loss 8.30773


 67%|██████▋   | 1335/2000 [12:04<05:53,  1.88it/s]

Training_loss 8.30316


 67%|██████▋   | 1336/2000 [12:05<05:56,  1.86it/s]

Training_loss 8.29145


 67%|██████▋   | 1337/2000 [12:05<05:20,  2.07it/s]

Training_loss 8.28439


 67%|██████▋   | 1338/2000 [12:06<04:48,  2.29it/s]

Training_loss 8.27975


 67%|██████▋   | 1339/2000 [12:06<04:58,  2.21it/s]

Training_loss 8.27246


 67%|██████▋   | 1340/2000 [12:06<04:33,  2.41it/s]

Training_loss 8.26732


 67%|██████▋   | 1341/2000 [12:07<04:59,  2.20it/s]

Training_loss 8.25964


 67%|██████▋   | 1342/2000 [12:07<05:16,  2.08it/s]

Training_loss 8.25465


 67%|██████▋   | 1343/2000 [12:08<05:32,  1.98it/s]

Training_loss 8.24769


 67%|██████▋   | 1344/2000 [12:09<05:39,  1.93it/s]

Training_loss 8.24295


 67%|██████▋   | 1345/2000 [12:09<05:47,  1.89it/s]

Training_loss 8.23927


 67%|██████▋   | 1346/2000 [12:10<05:50,  1.86it/s]

Training_loss 8.23039


 67%|██████▋   | 1347/2000 [12:10<05:58,  1.82it/s]

Training_loss 8.22120


 67%|██████▋   | 1348/2000 [12:11<06:05,  1.78it/s]

Training_loss 8.21333


 67%|██████▋   | 1349/2000 [12:11<06:04,  1.79it/s]

Training_loss 8.20972


 68%|██████▊   | 1350/2000 [12:12<06:01,  1.80it/s]

Training_loss 8.20402


 68%|██████▊   | 1351/2000 [12:13<06:03,  1.78it/s]

Training_loss 8.19330


 68%|██████▊   | 1352/2000 [12:13<06:00,  1.80it/s]

Training_loss 8.18439


 68%|██████▊   | 1353/2000 [12:14<05:59,  1.80it/s]

Training_loss 8.17392


 68%|██████▊   | 1354/2000 [12:14<05:49,  1.85it/s]

Training_loss 8.16420


 68%|██████▊   | 1355/2000 [12:15<05:40,  1.89it/s]

Training_loss 8.15389


 68%|██████▊   | 1356/2000 [12:15<05:35,  1.92it/s]

Training_loss 8.14689


 68%|██████▊   | 1357/2000 [12:16<05:31,  1.94it/s]

Training_loss 8.13908


 68%|██████▊   | 1358/2000 [12:16<05:30,  1.95it/s]

Training_loss 8.13297


 68%|██████▊   | 1359/2000 [12:17<05:34,  1.92it/s]

Training_loss 8.12175


 68%|██████▊   | 1360/2000 [12:17<05:44,  1.86it/s]

Training_loss 8.11316


 68%|██████▊   | 1361/2000 [12:18<05:35,  1.90it/s]

Training_loss 8.10807


 68%|██████▊   | 1362/2000 [12:18<05:35,  1.90it/s]

Training_loss 8.09995


 68%|██████▊   | 1363/2000 [12:19<05:30,  1.93it/s]

Training_loss 8.08740


 68%|██████▊   | 1364/2000 [12:19<05:38,  1.88it/s]

Training_loss 8.07860


 68%|██████▊   | 1365/2000 [12:20<05:31,  1.92it/s]

Training_loss 8.07211


 68%|██████▊   | 1366/2000 [12:20<05:28,  1.93it/s]

Training_loss 8.06237


 68%|██████▊   | 1367/2000 [12:21<05:32,  1.91it/s]

Training_loss 8.05356


 68%|██████▊   | 1368/2000 [12:21<05:24,  1.95it/s]

Training_loss 8.04598


 68%|██████▊   | 1369/2000 [12:22<04:41,  2.24it/s]

Training_loss 8.03332


 68%|██████▊   | 1370/2000 [12:22<04:45,  2.20it/s]

Training_loss 8.02539


 69%|██████▊   | 1371/2000 [12:23<04:49,  2.17it/s]

Training_loss 8.01632


 69%|██████▊   | 1372/2000 [12:23<04:32,  2.31it/s]

Training_loss 8.00896


 69%|██████▊   | 1373/2000 [12:23<04:46,  2.19it/s]

Training_loss 7.99929


 69%|██████▊   | 1374/2000 [12:24<04:59,  2.09it/s]

Training_loss 7.99129


 69%|██████▉   | 1375/2000 [12:25<05:04,  2.05it/s]

Training_loss 7.98519


 69%|██████▉   | 1376/2000 [12:25<05:10,  2.01it/s]

Training_loss 7.97819


 69%|██████▉   | 1377/2000 [12:26<05:10,  2.00it/s]

Training_loss 7.97120


 69%|██████▉   | 1378/2000 [12:26<05:14,  1.98it/s]

Training_loss 7.95827


 69%|██████▉   | 1379/2000 [12:27<05:08,  2.01it/s]

Training_loss 7.95208


 69%|██████▉   | 1380/2000 [12:27<05:21,  1.93it/s]

Training_loss 7.94501


 69%|██████▉   | 1381/2000 [12:28<05:26,  1.90it/s]

Training_loss 7.93429


 69%|██████▉   | 1382/2000 [12:28<05:22,  1.92it/s]

Training_loss 7.92400


 69%|██████▉   | 1383/2000 [12:29<05:18,  1.94it/s]

Training_loss 7.91722


 69%|██████▉   | 1384/2000 [12:29<05:34,  1.84it/s]

Training_loss 7.90774


 69%|██████▉   | 1385/2000 [12:30<05:28,  1.87it/s]

Training_loss 7.90329


 69%|██████▉   | 1386/2000 [12:30<05:25,  1.88it/s]

Training_loss 7.89351


 69%|██████▉   | 1387/2000 [12:31<05:17,  1.93it/s]

Training_loss 7.88769


 69%|██████▉   | 1388/2000 [12:31<05:12,  1.96it/s]

Training_loss 7.88462


 69%|██████▉   | 1389/2000 [12:32<05:19,  1.91it/s]

Training_loss 7.87937


 70%|██████▉   | 1390/2000 [12:32<05:11,  1.96it/s]

Training_loss 7.87376


 70%|██████▉   | 1391/2000 [12:33<05:09,  1.97it/s]

Training_loss 7.86715


 70%|██████▉   | 1392/2000 [12:33<05:07,  1.98it/s]

Training_loss 7.85622


 70%|██████▉   | 1393/2000 [12:34<05:06,  1.98it/s]

Training_loss 7.84506


 70%|██████▉   | 1394/2000 [12:34<05:06,  1.98it/s]

Training_loss 7.83813


 70%|██████▉   | 1395/2000 [12:35<05:07,  1.97it/s]

Training_loss 7.83278


 70%|██████▉   | 1396/2000 [12:35<05:11,  1.94it/s]

Training_loss 7.82362


 70%|██████▉   | 1397/2000 [12:36<05:03,  1.99it/s]

Training_loss 7.81418


 70%|██████▉   | 1398/2000 [12:36<05:05,  1.97it/s]

Training_loss 7.80850


 70%|██████▉   | 1399/2000 [12:37<05:04,  1.97it/s]

Training_loss 7.79649


 70%|███████   | 1400/2000 [12:37<05:07,  1.95it/s]

Training_loss 7.78581


 70%|███████   | 1401/2000 [12:38<04:37,  2.16it/s]

Training_loss 7.77348


 70%|███████   | 1402/2000 [12:38<04:41,  2.13it/s]

Training_loss 7.76670


 70%|███████   | 1403/2000 [12:39<04:23,  2.26it/s]

Training_loss 7.75993


 70%|███████   | 1404/2000 [12:39<04:06,  2.42it/s]

Training_loss 7.75309


 70%|███████   | 1405/2000 [12:40<04:43,  2.10it/s]

Training_loss 7.74608


 70%|███████   | 1406/2000 [12:40<04:42,  2.10it/s]

Training_loss 7.73578


 70%|███████   | 1407/2000 [12:41<04:44,  2.08it/s]

Training_loss 7.73132


 70%|███████   | 1408/2000 [12:41<04:44,  2.08it/s]

Training_loss 7.72552


 70%|███████   | 1409/2000 [12:42<05:08,  1.92it/s]

Training_loss 7.71971


 70%|███████   | 1410/2000 [12:42<05:23,  1.82it/s]

Training_loss 7.71386


 71%|███████   | 1411/2000 [12:43<05:33,  1.77it/s]

Training_loss 7.70687


 71%|███████   | 1412/2000 [12:43<05:23,  1.82it/s]

Training_loss 7.69945


 71%|███████   | 1413/2000 [12:44<05:30,  1.78it/s]

Training_loss 7.69249


 71%|███████   | 1414/2000 [12:44<05:12,  1.88it/s]

Training_loss 7.68688


 71%|███████   | 1415/2000 [12:45<05:15,  1.85it/s]

Training_loss 7.68094


 71%|███████   | 1416/2000 [12:46<05:12,  1.87it/s]

Training_loss 7.67716


 71%|███████   | 1417/2000 [12:46<05:12,  1.87it/s]

Training_loss 7.66730


 71%|███████   | 1418/2000 [12:47<05:06,  1.90it/s]

Training_loss 7.66104


 71%|███████   | 1419/2000 [12:47<05:12,  1.86it/s]

Training_loss 7.65101


 71%|███████   | 1420/2000 [12:48<05:13,  1.85it/s]

Training_loss 7.64253


 71%|███████   | 1421/2000 [12:48<05:19,  1.81it/s]

Training_loss 7.63689


 71%|███████   | 1422/2000 [12:49<05:11,  1.86it/s]

Training_loss 7.63347


 71%|███████   | 1423/2000 [12:49<05:07,  1.88it/s]

Training_loss 7.62700


 71%|███████   | 1424/2000 [12:50<05:11,  1.85it/s]

Training_loss 7.61759


 71%|███████▏  | 1425/2000 [12:50<05:11,  1.85it/s]

Training_loss 7.61355


 71%|███████▏  | 1426/2000 [12:51<05:15,  1.82it/s]

Training_loss 7.60401


 71%|███████▏  | 1427/2000 [12:52<05:12,  1.84it/s]

Training_loss 7.59845


 71%|███████▏  | 1428/2000 [12:52<05:02,  1.89it/s]

Training_loss 7.59222


 71%|███████▏  | 1429/2000 [12:53<05:00,  1.90it/s]

Training_loss 7.58406


 72%|███████▏  | 1430/2000 [12:53<05:08,  1.85it/s]

Training_loss 7.57443


 72%|███████▏  | 1431/2000 [12:54<04:54,  1.93it/s]

Training_loss 7.56684


 72%|███████▏  | 1432/2000 [12:54<04:54,  1.93it/s]

Training_loss 7.56003


 72%|███████▏  | 1433/2000 [12:55<04:44,  1.99it/s]

Training_loss 7.55395


 72%|███████▏  | 1434/2000 [12:55<04:59,  1.89it/s]

Training_loss 7.54734


 72%|███████▏  | 1435/2000 [12:56<04:37,  2.04it/s]

Training_loss 7.53966


 72%|███████▏  | 1436/2000 [12:56<04:35,  2.04it/s]

Training_loss 7.53398


 72%|███████▏  | 1437/2000 [12:57<04:47,  1.96it/s]

Training_loss 7.52856


 72%|███████▏  | 1438/2000 [12:57<05:11,  1.80it/s]

Training_loss 7.52113


 72%|███████▏  | 1439/2000 [12:58<05:05,  1.84it/s]

Training_loss 7.51170


 72%|███████▏  | 1440/2000 [12:58<04:56,  1.89it/s]

Training_loss 7.50085


 72%|███████▏  | 1441/2000 [12:59<05:06,  1.82it/s]

Training_loss 7.49059


 72%|███████▏  | 1442/2000 [12:59<05:13,  1.78it/s]

Training_loss 7.48331


 72%|███████▏  | 1443/2000 [13:00<04:54,  1.89it/s]

Training_loss 7.47684


 72%|███████▏  | 1444/2000 [13:00<04:55,  1.88it/s]

Training_loss 7.47268


 72%|███████▏  | 1445/2000 [13:01<04:49,  1.92it/s]

Training_loss 7.46848


 72%|███████▏  | 1446/2000 [13:01<04:49,  1.91it/s]

Training_loss 7.46409


 72%|███████▏  | 1447/2000 [13:02<04:55,  1.87it/s]

Training_loss 7.45718


 72%|███████▏  | 1448/2000 [13:03<04:55,  1.87it/s]

Training_loss 7.44840


 72%|███████▏  | 1449/2000 [13:03<04:48,  1.91it/s]

Training_loss 7.43997


 72%|███████▎  | 1450/2000 [13:04<04:45,  1.93it/s]

Training_loss 7.43276


 73%|███████▎  | 1451/2000 [13:04<04:49,  1.90it/s]

Training_loss 7.42665


 73%|███████▎  | 1452/2000 [13:05<04:50,  1.88it/s]

Training_loss 7.42239


 73%|███████▎  | 1453/2000 [13:05<04:46,  1.91it/s]

Training_loss 7.41409


 73%|███████▎  | 1454/2000 [13:06<04:48,  1.89it/s]

Training_loss 7.40713


 73%|███████▎  | 1455/2000 [13:06<04:43,  1.92it/s]

Training_loss 7.39951


 73%|███████▎  | 1456/2000 [13:07<04:35,  1.97it/s]

Training_loss 7.39454


 73%|███████▎  | 1457/2000 [13:07<04:44,  1.91it/s]

Training_loss 7.38613


 73%|███████▎  | 1458/2000 [13:08<04:44,  1.91it/s]

Training_loss 7.38273


 73%|███████▎  | 1459/2000 [13:08<04:41,  1.92it/s]

Training_loss 7.37463


 73%|███████▎  | 1460/2000 [13:09<04:41,  1.92it/s]

Training_loss 7.36918


 73%|███████▎  | 1461/2000 [13:09<04:28,  2.01it/s]

Training_loss 7.36221


 73%|███████▎  | 1462/2000 [13:10<04:02,  2.22it/s]

Training_loss 7.35572


 73%|███████▎  | 1463/2000 [13:10<04:01,  2.23it/s]

Training_loss 7.34629


 73%|███████▎  | 1464/2000 [13:11<04:10,  2.14it/s]

Training_loss 7.33830


 73%|███████▎  | 1465/2000 [13:11<03:57,  2.25it/s]

Training_loss 7.33514


 73%|███████▎  | 1466/2000 [13:11<04:19,  2.06it/s]

Training_loss 7.32511


 73%|███████▎  | 1467/2000 [13:12<04:29,  1.98it/s]

Training_loss 7.31540


 73%|███████▎  | 1468/2000 [13:13<04:37,  1.92it/s]

Training_loss 7.31268


 73%|███████▎  | 1469/2000 [13:13<04:44,  1.87it/s]

Training_loss 7.30866


 74%|███████▎  | 1470/2000 [13:14<04:50,  1.83it/s]

Training_loss 7.30241


 74%|███████▎  | 1471/2000 [13:14<04:39,  1.89it/s]

Training_loss 7.29083


 74%|███████▎  | 1472/2000 [13:15<04:30,  1.95it/s]

Training_loss 7.28555


 74%|███████▎  | 1473/2000 [13:15<04:33,  1.93it/s]

Training_loss 7.28207


 74%|███████▎  | 1474/2000 [13:16<04:31,  1.94it/s]

Training_loss 7.27462


 74%|███████▍  | 1475/2000 [13:16<04:25,  1.98it/s]

Training_loss 7.26819


 74%|███████▍  | 1476/2000 [13:17<04:28,  1.95it/s]

Training_loss 7.26266


 74%|███████▍  | 1477/2000 [13:17<04:23,  1.98it/s]

Training_loss 7.25438


 74%|███████▍  | 1478/2000 [13:18<04:28,  1.94it/s]

Training_loss 7.24747


 74%|███████▍  | 1479/2000 [13:18<04:24,  1.97it/s]

Training_loss 7.23782


 74%|███████▍  | 1480/2000 [13:19<04:23,  1.98it/s]

Training_loss 7.23204


 74%|███████▍  | 1481/2000 [13:19<04:22,  1.98it/s]

Training_loss 7.22631


 74%|███████▍  | 1482/2000 [13:20<04:22,  1.98it/s]

Training_loss 7.21997


 74%|███████▍  | 1483/2000 [13:20<04:17,  2.00it/s]

Training_loss 7.21595


 74%|███████▍  | 1484/2000 [13:21<04:24,  1.95it/s]

Training_loss 7.21040


 74%|███████▍  | 1485/2000 [13:21<04:22,  1.96it/s]

Training_loss 7.20693


 74%|███████▍  | 1486/2000 [13:22<04:19,  1.98it/s]

Training_loss 7.20300


 74%|███████▍  | 1487/2000 [13:22<04:27,  1.92it/s]

Training_loss 7.19722


 74%|███████▍  | 1488/2000 [13:23<04:27,  1.92it/s]

Training_loss 7.19171


 74%|███████▍  | 1489/2000 [13:23<04:31,  1.88it/s]

Training_loss 7.18425


 74%|███████▍  | 1490/2000 [13:24<04:28,  1.90it/s]

Training_loss 7.17683


 75%|███████▍  | 1491/2000 [13:24<04:27,  1.90it/s]

Training_loss 7.17172


 75%|███████▍  | 1492/2000 [13:25<04:32,  1.86it/s]

Training_loss 7.16689


 75%|███████▍  | 1493/2000 [13:25<04:00,  2.11it/s]

Training_loss 7.16260


 75%|███████▍  | 1494/2000 [13:26<03:47,  2.23it/s]

Training_loss 7.15314


 75%|███████▍  | 1495/2000 [13:26<03:43,  2.26it/s]

Training_loss 7.14684


 75%|███████▍  | 1496/2000 [13:27<03:38,  2.31it/s]

Training_loss 7.13604


 75%|███████▍  | 1497/2000 [13:27<03:49,  2.19it/s]

Training_loss 7.12914


 75%|███████▍  | 1498/2000 [13:28<04:04,  2.06it/s]

Training_loss 7.12452


 75%|███████▍  | 1499/2000 [13:28<04:09,  2.01it/s]

Training_loss 7.11456


 75%|███████▌  | 1500/2000 [13:29<04:07,  2.02it/s]

Training_loss 7.10439


 75%|███████▌  | 1501/2000 [13:29<04:14,  1.96it/s]

Training_loss 7.10294


 75%|███████▌  | 1502/2000 [13:30<04:01,  2.06it/s]

Training_loss 7.09786


 75%|███████▌  | 1503/2000 [13:30<04:06,  2.02it/s]

Training_loss 7.09050


 75%|███████▌  | 1504/2000 [13:31<04:09,  1.99it/s]

Training_loss 7.08282


 75%|███████▌  | 1505/2000 [13:31<04:04,  2.02it/s]

Training_loss 7.07670


 75%|███████▌  | 1506/2000 [13:32<04:07,  2.00it/s]

Training_loss 7.07159


 75%|███████▌  | 1507/2000 [13:32<04:04,  2.02it/s]

Training_loss 7.06594


 75%|███████▌  | 1508/2000 [13:33<04:20,  1.89it/s]

Training_loss 7.06095


 75%|███████▌  | 1509/2000 [13:33<04:08,  1.98it/s]

Training_loss 7.05206


 76%|███████▌  | 1510/2000 [13:34<03:55,  2.08it/s]

Training_loss 7.04568


 76%|███████▌  | 1511/2000 [13:34<04:06,  1.99it/s]

Training_loss 7.04134


 76%|███████▌  | 1512/2000 [13:35<04:21,  1.87it/s]

Training_loss 7.03607


 76%|███████▌  | 1513/2000 [13:35<04:20,  1.87it/s]

Training_loss 7.03044


 76%|███████▌  | 1514/2000 [13:36<04:19,  1.88it/s]

Training_loss 7.02474


 76%|███████▌  | 1515/2000 [13:36<04:11,  1.93it/s]

Training_loss 7.01952


 76%|███████▌  | 1516/2000 [13:37<04:19,  1.86it/s]

Training_loss 7.00985


 76%|███████▌  | 1517/2000 [13:37<04:16,  1.88it/s]

Training_loss 7.00755


 76%|███████▌  | 1518/2000 [13:38<04:23,  1.83it/s]

Training_loss 6.99941


 76%|███████▌  | 1519/2000 [13:39<04:23,  1.82it/s]

Training_loss 6.99287


 76%|███████▌  | 1520/2000 [13:39<04:24,  1.81it/s]

Training_loss 6.98899


 76%|███████▌  | 1521/2000 [13:40<04:28,  1.78it/s]

Training_loss 6.98464


 76%|███████▌  | 1522/2000 [13:40<04:26,  1.79it/s]

Training_loss 6.98057


 76%|███████▌  | 1523/2000 [13:41<04:19,  1.84it/s]

Training_loss 6.97450


 76%|███████▌  | 1524/2000 [13:41<04:03,  1.95it/s]

Training_loss 6.96442


 76%|███████▋  | 1525/2000 [13:42<03:37,  2.18it/s]

Training_loss 6.95856


 76%|███████▋  | 1526/2000 [13:42<03:47,  2.09it/s]

Training_loss 6.95436


 76%|███████▋  | 1527/2000 [13:43<03:58,  1.99it/s]

Training_loss 6.94818


 76%|███████▋  | 1528/2000 [13:43<03:49,  2.06it/s]

Training_loss 6.94343


 76%|███████▋  | 1529/2000 [13:44<04:04,  1.92it/s]

Training_loss 6.93715


 76%|███████▋  | 1530/2000 [13:44<04:05,  1.92it/s]

Training_loss 6.92925


 77%|███████▋  | 1531/2000 [13:45<04:15,  1.83it/s]

Training_loss 6.92384


 77%|███████▋  | 1532/2000 [13:45<04:12,  1.85it/s]

Training_loss 6.91902


 77%|███████▋  | 1533/2000 [13:46<04:18,  1.81it/s]

Training_loss 6.91277


 77%|███████▋  | 1534/2000 [13:47<04:28,  1.74it/s]

Training_loss 6.90664


 77%|███████▋  | 1535/2000 [13:47<04:19,  1.79it/s]

Training_loss 6.90145


 77%|███████▋  | 1536/2000 [13:48<04:17,  1.80it/s]

Training_loss 6.89364


 77%|███████▋  | 1537/2000 [13:48<04:17,  1.80it/s]

Training_loss 6.89001


 77%|███████▋  | 1538/2000 [13:49<04:16,  1.80it/s]

Training_loss 6.88554


 77%|███████▋  | 1539/2000 [13:49<04:11,  1.83it/s]

Training_loss 6.87898


 77%|███████▋  | 1540/2000 [13:50<04:17,  1.79it/s]

Training_loss 6.87291


 77%|███████▋  | 1541/2000 [13:50<04:23,  1.74it/s]

Training_loss 6.86810


 77%|███████▋  | 1542/2000 [13:51<04:22,  1.74it/s]

Training_loss 6.86383


 77%|███████▋  | 1543/2000 [13:52<04:21,  1.75it/s]

Training_loss 6.85682


 77%|███████▋  | 1544/2000 [13:52<04:23,  1.73it/s]

Training_loss 6.84849


 77%|███████▋  | 1545/2000 [13:53<04:26,  1.70it/s]

Training_loss 6.84479


 77%|███████▋  | 1546/2000 [13:53<04:21,  1.73it/s]

Training_loss 6.84034


 77%|███████▋  | 1547/2000 [13:54<04:12,  1.79it/s]

Training_loss 6.83278


 77%|███████▋  | 1548/2000 [13:54<04:08,  1.82it/s]

Training_loss 6.82893


 77%|███████▋  | 1549/2000 [13:55<04:00,  1.88it/s]

Training_loss 6.82144


 78%|███████▊  | 1550/2000 [13:55<04:01,  1.86it/s]

Training_loss 6.81207


 78%|███████▊  | 1551/2000 [13:56<03:56,  1.90it/s]

Training_loss 6.80502


 78%|███████▊  | 1552/2000 [13:56<03:49,  1.95it/s]

Training_loss 6.80104


 78%|███████▊  | 1553/2000 [13:57<03:49,  1.94it/s]

Training_loss 6.79688


 78%|███████▊  | 1554/2000 [13:57<03:43,  1.99it/s]

Training_loss 6.79031


 78%|███████▊  | 1555/2000 [13:58<03:50,  1.93it/s]

Training_loss 6.78233


 78%|███████▊  | 1556/2000 [13:58<03:29,  2.12it/s]

Training_loss 6.77777


 78%|███████▊  | 1557/2000 [13:59<03:11,  2.32it/s]

Training_loss 6.77412


 78%|███████▊  | 1558/2000 [13:59<03:16,  2.25it/s]

Training_loss 6.77015


 78%|███████▊  | 1559/2000 [14:00<03:26,  2.14it/s]

Training_loss 6.76741


 78%|███████▊  | 1560/2000 [14:00<03:31,  2.08it/s]

Training_loss 6.76244


 78%|███████▊  | 1561/2000 [14:01<03:39,  2.00it/s]

Training_loss 6.75927


 78%|███████▊  | 1562/2000 [14:01<03:43,  1.96it/s]

Training_loss 6.75560


 78%|███████▊  | 1563/2000 [14:02<03:43,  1.95it/s]

Training_loss 6.75343


 78%|███████▊  | 1564/2000 [14:02<03:44,  1.94it/s]

Training_loss 6.74911


 78%|███████▊  | 1565/2000 [14:03<03:44,  1.94it/s]

Training_loss 6.74395


 78%|███████▊  | 1566/2000 [14:03<03:54,  1.85it/s]

Training_loss 6.73710


 78%|███████▊  | 1567/2000 [14:04<03:57,  1.82it/s]

Training_loss 6.73119


 78%|███████▊  | 1568/2000 [14:05<04:03,  1.77it/s]

Training_loss 6.72386


 78%|███████▊  | 1569/2000 [14:05<04:00,  1.79it/s]

Training_loss 6.72045


 78%|███████▊  | 1570/2000 [14:06<03:55,  1.83it/s]

Training_loss 6.71623


 79%|███████▊  | 1571/2000 [14:06<03:50,  1.86it/s]

Training_loss 6.71357


 79%|███████▊  | 1572/2000 [14:07<03:59,  1.79it/s]

Training_loss 6.70957


 79%|███████▊  | 1573/2000 [14:07<03:52,  1.83it/s]

Training_loss 6.70504


 79%|███████▊  | 1574/2000 [14:08<03:54,  1.82it/s]

Training_loss 6.70180


 79%|███████▉  | 1575/2000 [14:08<03:45,  1.89it/s]

Training_loss 6.69413


 79%|███████▉  | 1576/2000 [14:09<03:48,  1.86it/s]

Training_loss 6.68945


 79%|███████▉  | 1577/2000 [14:09<03:45,  1.87it/s]

Training_loss 6.68616


 79%|███████▉  | 1578/2000 [14:10<03:54,  1.80it/s]

Training_loss 6.68280


 79%|███████▉  | 1579/2000 [14:11<03:51,  1.82it/s]

Training_loss 6.67871


 79%|███████▉  | 1580/2000 [14:11<03:48,  1.84it/s]

Training_loss 6.67574


 79%|███████▉  | 1581/2000 [14:12<03:44,  1.87it/s]

Training_loss 6.66930


 79%|███████▉  | 1582/2000 [14:12<03:40,  1.90it/s]

Training_loss 6.66450


 79%|███████▉  | 1583/2000 [14:13<03:29,  1.99it/s]

Training_loss 6.65955


 79%|███████▉  | 1584/2000 [14:13<03:23,  2.04it/s]

Training_loss 6.65562


 79%|███████▉  | 1585/2000 [14:13<03:02,  2.27it/s]

Training_loss 6.65241


 79%|███████▉  | 1586/2000 [14:14<03:14,  2.13it/s]

Training_loss 6.64818


 79%|███████▉  | 1587/2000 [14:14<03:12,  2.15it/s]

Training_loss 6.64346


 79%|███████▉  | 1588/2000 [14:15<02:56,  2.34it/s]

Training_loss 6.63999


 79%|███████▉  | 1589/2000 [14:15<02:58,  2.30it/s]

Training_loss 6.63517


 80%|███████▉  | 1590/2000 [14:16<03:07,  2.19it/s]

Training_loss 6.63271


 80%|███████▉  | 1591/2000 [14:16<03:16,  2.09it/s]

Training_loss 6.62465


 80%|███████▉  | 1592/2000 [14:17<03:21,  2.03it/s]

Training_loss 6.62052


 80%|███████▉  | 1593/2000 [14:17<03:18,  2.05it/s]

Training_loss 6.61788


 80%|███████▉  | 1594/2000 [14:18<03:28,  1.95it/s]

Training_loss 6.61502


 80%|███████▉  | 1595/2000 [14:18<03:33,  1.90it/s]

Training_loss 6.60801


 80%|███████▉  | 1596/2000 [14:19<03:30,  1.92it/s]

Training_loss 6.60301


 80%|███████▉  | 1597/2000 [14:19<03:32,  1.89it/s]

Training_loss 6.59982


 80%|███████▉  | 1598/2000 [14:20<03:34,  1.88it/s]

Training_loss 6.59315


 80%|███████▉  | 1599/2000 [14:20<03:33,  1.88it/s]

Training_loss 6.58738


 80%|████████  | 1600/2000 [14:21<03:36,  1.85it/s]

Training_loss 6.58315


 80%|████████  | 1601/2000 [14:22<03:36,  1.85it/s]

Training_loss 6.57948


 80%|████████  | 1602/2000 [14:22<03:40,  1.81it/s]

Training_loss 6.57460


 80%|████████  | 1603/2000 [14:23<03:39,  1.81it/s]

Training_loss 6.56829


 80%|████████  | 1604/2000 [14:23<03:39,  1.81it/s]

Training_loss 6.56259


 80%|████████  | 1605/2000 [14:24<03:34,  1.85it/s]

Training_loss 6.56019


 80%|████████  | 1606/2000 [14:24<03:32,  1.85it/s]

Training_loss 6.55747


 80%|████████  | 1607/2000 [14:25<03:38,  1.80it/s]

Training_loss 6.55177


 80%|████████  | 1608/2000 [14:25<03:46,  1.73it/s]

Training_loss 6.54692


 80%|████████  | 1609/2000 [14:26<03:52,  1.68it/s]

Training_loss 6.54201


 80%|████████  | 1610/2000 [14:27<03:48,  1.71it/s]

Training_loss 6.53646


 81%|████████  | 1611/2000 [14:27<03:50,  1.69it/s]

Training_loss 6.53223


 81%|████████  | 1612/2000 [14:28<03:44,  1.73it/s]

Training_loss 6.52548


 81%|████████  | 1613/2000 [14:28<03:45,  1.72it/s]

Training_loss 6.52151


 81%|████████  | 1614/2000 [14:29<03:26,  1.87it/s]

Training_loss 6.51805


 81%|████████  | 1615/2000 [14:29<03:20,  1.92it/s]

Training_loss 6.51187


 81%|████████  | 1616/2000 [14:30<03:12,  1.99it/s]

Training_loss 6.50388


 81%|████████  | 1617/2000 [14:30<03:03,  2.08it/s]

Training_loss 6.49481


 81%|████████  | 1618/2000 [14:31<02:48,  2.27it/s]

Training_loss 6.48827


 81%|████████  | 1619/2000 [14:31<03:04,  2.07it/s]

Training_loss 6.47909


 81%|████████  | 1620/2000 [14:32<03:11,  1.99it/s]

Training_loss 6.47497


 81%|████████  | 1621/2000 [14:32<03:12,  1.97it/s]

Training_loss 6.47214


 81%|████████  | 1622/2000 [14:33<03:18,  1.90it/s]

Training_loss 6.46586


 81%|████████  | 1623/2000 [14:33<03:17,  1.91it/s]

Training_loss 6.46328


 81%|████████  | 1624/2000 [14:34<03:24,  1.84it/s]

Training_loss 6.45900


 81%|████████▏ | 1625/2000 [14:34<03:27,  1.81it/s]

Training_loss 6.45534


 81%|████████▏ | 1626/2000 [14:35<03:21,  1.86it/s]

Training_loss 6.45204


 81%|████████▏ | 1627/2000 [14:36<03:18,  1.88it/s]

Training_loss 6.44680


 81%|████████▏ | 1628/2000 [14:36<03:16,  1.89it/s]

Training_loss 6.44399


 81%|████████▏ | 1629/2000 [14:37<03:15,  1.90it/s]

Training_loss 6.44203


 82%|████████▏ | 1630/2000 [14:37<03:14,  1.90it/s]

Training_loss 6.43765


 82%|████████▏ | 1631/2000 [14:38<03:17,  1.87it/s]

Training_loss 6.43343


 82%|████████▏ | 1632/2000 [14:38<03:17,  1.86it/s]

Training_loss 6.42788


 82%|████████▏ | 1633/2000 [14:39<03:19,  1.84it/s]

Training_loss 6.42024


 82%|████████▏ | 1634/2000 [14:39<03:22,  1.81it/s]

Training_loss 6.41361


 82%|████████▏ | 1635/2000 [14:40<03:19,  1.83it/s]

Training_loss 6.41141


 82%|████████▏ | 1636/2000 [14:40<03:16,  1.85it/s]

Training_loss 6.40828


 82%|████████▏ | 1637/2000 [14:41<03:15,  1.86it/s]

Training_loss 6.40320


 82%|████████▏ | 1638/2000 [14:41<03:14,  1.86it/s]

Training_loss 6.40092


 82%|████████▏ | 1639/2000 [14:42<03:10,  1.90it/s]

Training_loss 6.39876


 82%|████████▏ | 1640/2000 [14:43<03:15,  1.84it/s]

Training_loss 6.39296


 82%|████████▏ | 1641/2000 [14:43<03:10,  1.89it/s]

Training_loss 6.38750


 82%|████████▏ | 1642/2000 [14:44<03:14,  1.84it/s]

Training_loss 6.38658


 82%|████████▏ | 1643/2000 [14:44<03:19,  1.79it/s]

Training_loss 6.38408


 82%|████████▏ | 1644/2000 [14:45<03:20,  1.78it/s]

Training_loss 6.38131


 82%|████████▏ | 1645/2000 [14:45<03:16,  1.81it/s]

Training_loss 6.37578


 82%|████████▏ | 1646/2000 [14:46<02:47,  2.11it/s]

Training_loss 6.37016


 82%|████████▏ | 1647/2000 [14:46<02:40,  2.20it/s]

Training_loss 6.36481


 82%|████████▏ | 1648/2000 [14:46<02:43,  2.15it/s]

Training_loss 6.36087


 82%|████████▏ | 1649/2000 [14:47<02:38,  2.22it/s]

Training_loss 6.35826


 82%|████████▎ | 1650/2000 [14:47<02:47,  2.09it/s]

Training_loss 6.35470


 83%|████████▎ | 1651/2000 [14:48<02:52,  2.02it/s]

Training_loss 6.35386


 83%|████████▎ | 1652/2000 [14:48<02:51,  2.03it/s]

Training_loss 6.34575


 83%|████████▎ | 1653/2000 [14:49<02:59,  1.93it/s]

Training_loss 6.34092


 83%|████████▎ | 1654/2000 [14:50<03:00,  1.91it/s]

Training_loss 6.33862


 83%|████████▎ | 1655/2000 [14:50<02:57,  1.95it/s]

Training_loss 6.33600


 83%|████████▎ | 1656/2000 [14:51<02:54,  1.97it/s]

Training_loss 6.33165


 83%|████████▎ | 1657/2000 [14:51<03:00,  1.90it/s]

Training_loss 6.32630


 83%|████████▎ | 1658/2000 [14:52<03:01,  1.88it/s]

Training_loss 6.32094


 83%|████████▎ | 1659/2000 [14:52<02:59,  1.90it/s]

Training_loss 6.31438


 83%|████████▎ | 1660/2000 [14:53<03:00,  1.88it/s]

Training_loss 6.31160


 83%|████████▎ | 1661/2000 [14:53<03:00,  1.88it/s]

Training_loss 6.31023


 83%|████████▎ | 1662/2000 [14:54<02:57,  1.90it/s]

Training_loss 6.30895


 83%|████████▎ | 1663/2000 [14:54<03:02,  1.85it/s]

Training_loss 6.30530


 83%|████████▎ | 1664/2000 [14:55<03:02,  1.85it/s]

Training_loss 6.29902


 83%|████████▎ | 1665/2000 [14:55<03:04,  1.81it/s]

Training_loss 6.29296


 83%|████████▎ | 1666/2000 [14:56<02:59,  1.86it/s]

Training_loss 6.29123


 83%|████████▎ | 1667/2000 [14:57<03:06,  1.79it/s]

Training_loss 6.28654


 83%|████████▎ | 1668/2000 [14:57<03:05,  1.79it/s]

Training_loss 6.28312


 83%|████████▎ | 1669/2000 [14:58<03:04,  1.80it/s]

Training_loss 6.27704


 84%|████████▎ | 1670/2000 [14:58<03:04,  1.79it/s]

Training_loss 6.27214


 84%|████████▎ | 1671/2000 [14:59<02:53,  1.90it/s]

Training_loss 6.26475


 84%|████████▎ | 1672/2000 [14:59<02:50,  1.92it/s]

Training_loss 6.26134


 84%|████████▎ | 1673/2000 [15:00<02:50,  1.92it/s]

Training_loss 6.25701


 84%|████████▎ | 1674/2000 [15:00<02:48,  1.94it/s]

Training_loss 6.25525


 84%|████████▍ | 1675/2000 [15:01<02:50,  1.91it/s]

Training_loss 6.25009


 84%|████████▍ | 1676/2000 [15:01<02:49,  1.92it/s]

Training_loss 6.24819


 84%|████████▍ | 1677/2000 [15:02<02:49,  1.91it/s]

Training_loss 6.24163


 84%|████████▍ | 1678/2000 [15:02<02:27,  2.18it/s]

Training_loss 6.23769


 84%|████████▍ | 1679/2000 [15:03<02:23,  2.24it/s]

Training_loss 6.23263


 84%|████████▍ | 1680/2000 [15:03<02:25,  2.20it/s]

Training_loss 6.23025


 84%|████████▍ | 1681/2000 [15:04<02:29,  2.14it/s]

Training_loss 6.22578


 84%|████████▍ | 1682/2000 [15:04<02:14,  2.36it/s]

Training_loss 6.22450


 84%|████████▍ | 1683/2000 [15:04<02:16,  2.32it/s]

Training_loss 6.21914


 84%|████████▍ | 1684/2000 [15:05<02:22,  2.21it/s]

Training_loss 6.21634


 84%|████████▍ | 1685/2000 [15:05<02:28,  2.12it/s]

Training_loss 6.21268


 84%|████████▍ | 1686/2000 [15:06<02:28,  2.12it/s]

Training_loss 6.20903


 84%|████████▍ | 1687/2000 [15:06<02:33,  2.03it/s]

Training_loss 6.20713


 84%|████████▍ | 1688/2000 [15:07<02:31,  2.06it/s]

Training_loss 6.20388


 84%|████████▍ | 1689/2000 [15:07<02:34,  2.02it/s]

Training_loss 6.19941


 84%|████████▍ | 1690/2000 [15:08<02:35,  1.99it/s]

Training_loss 6.19338


 85%|████████▍ | 1691/2000 [15:08<02:31,  2.03it/s]

Training_loss 6.18689


 85%|████████▍ | 1692/2000 [15:09<02:32,  2.02it/s]

Training_loss 6.18421


 85%|████████▍ | 1693/2000 [15:09<02:34,  1.99it/s]

Training_loss 6.18668


 85%|████████▍ | 1694/2000 [15:10<02:39,  1.92it/s]

Training_loss 6.18334


 85%|████████▍ | 1695/2000 [15:10<02:43,  1.87it/s]

Training_loss 6.17787


 85%|████████▍ | 1696/2000 [15:11<02:37,  1.93it/s]

Training_loss 6.17540


 85%|████████▍ | 1697/2000 [15:12<02:42,  1.86it/s]

Training_loss 6.16981


 85%|████████▍ | 1698/2000 [15:12<02:43,  1.85it/s]

Training_loss 6.16283


 85%|████████▍ | 1699/2000 [15:13<02:39,  1.89it/s]

Training_loss 6.15864


 85%|████████▌ | 1700/2000 [15:13<02:35,  1.93it/s]

Training_loss 6.15525


 85%|████████▌ | 1701/2000 [15:14<02:35,  1.92it/s]

Training_loss 6.15143


 85%|████████▌ | 1702/2000 [15:14<02:34,  1.93it/s]

Training_loss 6.14852


 85%|████████▌ | 1703/2000 [15:15<02:28,  2.00it/s]

Training_loss 6.14312


 85%|████████▌ | 1704/2000 [15:15<02:27,  2.00it/s]

Training_loss 6.14194


 85%|████████▌ | 1705/2000 [15:16<02:29,  1.97it/s]

Training_loss 6.13878


 85%|████████▌ | 1706/2000 [15:16<02:29,  1.97it/s]

Training_loss 6.13786


 85%|████████▌ | 1707/2000 [15:17<02:29,  1.96it/s]

Training_loss 6.13402


 85%|████████▌ | 1708/2000 [15:17<02:26,  1.99it/s]

Training_loss 6.13127


 85%|████████▌ | 1709/2000 [15:17<02:15,  2.15it/s]

Training_loss 6.12815


 86%|████████▌ | 1710/2000 [15:18<02:18,  2.09it/s]

Training_loss 6.12408


 86%|████████▌ | 1711/2000 [15:18<02:09,  2.23it/s]

Training_loss 6.12156


 86%|████████▌ | 1712/2000 [15:19<01:59,  2.40it/s]

Training_loss 6.12076


 86%|████████▌ | 1713/2000 [15:19<02:10,  2.19it/s]

Training_loss 6.11960


 86%|████████▌ | 1714/2000 [15:20<02:18,  2.06it/s]

Training_loss 6.11450


 86%|████████▌ | 1715/2000 [15:20<02:19,  2.05it/s]

Training_loss 6.10997


 86%|████████▌ | 1716/2000 [15:21<02:19,  2.03it/s]

Training_loss 6.10817


 86%|████████▌ | 1717/2000 [15:21<02:24,  1.96it/s]

Training_loss 6.10897


 86%|████████▌ | 1718/2000 [15:22<02:25,  1.94it/s]

Training_loss 6.10475


 86%|████████▌ | 1719/2000 [15:22<02:26,  1.92it/s]

Training_loss 6.10120


 86%|████████▌ | 1720/2000 [15:23<02:23,  1.95it/s]

Training_loss 6.09658


 86%|████████▌ | 1721/2000 [15:23<02:24,  1.94it/s]

Training_loss 6.09183


 86%|████████▌ | 1722/2000 [15:24<02:23,  1.94it/s]

Training_loss 6.08962


 86%|████████▌ | 1723/2000 [15:24<02:25,  1.90it/s]

Training_loss 6.08696


 86%|████████▌ | 1724/2000 [15:25<02:12,  2.08it/s]

Training_loss 6.08184


 86%|████████▋ | 1725/2000 [15:25<02:01,  2.26it/s]

Training_loss 6.07463


 86%|████████▋ | 1726/2000 [15:26<02:08,  2.14it/s]

Training_loss 6.07070


 86%|████████▋ | 1727/2000 [15:26<02:16,  1.99it/s]

Training_loss 6.06875


 86%|████████▋ | 1728/2000 [15:27<02:18,  1.97it/s]

Training_loss 6.06677


 86%|████████▋ | 1729/2000 [15:27<02:15,  2.00it/s]

Training_loss 6.06248


 86%|████████▋ | 1730/2000 [15:28<02:18,  1.95it/s]

Training_loss 6.06098


 87%|████████▋ | 1731/2000 [15:28<02:19,  1.93it/s]

Training_loss 6.05923


 87%|████████▋ | 1732/2000 [15:29<02:22,  1.88it/s]

Training_loss 6.05605


 87%|████████▋ | 1733/2000 [15:30<02:28,  1.80it/s]

Training_loss 6.05230


 87%|████████▋ | 1734/2000 [15:30<02:26,  1.82it/s]

Training_loss 6.04911


 87%|████████▋ | 1735/2000 [15:31<02:18,  1.91it/s]

Training_loss 6.04269


 87%|████████▋ | 1736/2000 [15:31<02:20,  1.88it/s]

Training_loss 6.04155


 87%|████████▋ | 1737/2000 [15:32<02:16,  1.92it/s]

Training_loss 6.03813


 87%|████████▋ | 1738/2000 [15:32<02:16,  1.92it/s]

Training_loss 6.03302


 87%|████████▋ | 1739/2000 [15:33<02:14,  1.94it/s]

Training_loss 6.02831


 87%|████████▋ | 1740/2000 [15:33<02:09,  2.01it/s]

Training_loss 6.02691


 87%|████████▋ | 1741/2000 [15:33<01:54,  2.26it/s]

Training_loss 6.02487


 87%|████████▋ | 1742/2000 [15:34<01:56,  2.22it/s]

Training_loss 6.02285


 87%|████████▋ | 1743/2000 [15:34<01:52,  2.29it/s]

Training_loss 6.01884


 87%|████████▋ | 1744/2000 [15:35<01:48,  2.35it/s]

Training_loss 6.01634


 87%|████████▋ | 1745/2000 [15:35<02:03,  2.07it/s]

Training_loss 6.01342


 87%|████████▋ | 1746/2000 [15:36<01:59,  2.12it/s]

Training_loss 6.00999


 87%|████████▋ | 1747/2000 [15:36<02:07,  1.98it/s]

Training_loss 6.00547


 87%|████████▋ | 1748/2000 [15:37<02:12,  1.90it/s]

Training_loss 6.00095


 87%|████████▋ | 1749/2000 [15:38<02:18,  1.82it/s]

Training_loss 5.99650


 88%|████████▊ | 1750/2000 [15:38<02:15,  1.84it/s]

Training_loss 5.99279


 88%|████████▊ | 1751/2000 [15:39<02:15,  1.84it/s]

Training_loss 5.98696


 88%|████████▊ | 1752/2000 [15:39<02:11,  1.89it/s]

Training_loss 5.98473


 88%|████████▊ | 1753/2000 [15:40<02:11,  1.87it/s]

Training_loss 5.98114


 88%|████████▊ | 1754/2000 [15:40<02:09,  1.90it/s]

Training_loss 5.97729


 88%|████████▊ | 1755/2000 [15:41<02:08,  1.90it/s]

Training_loss 5.97541


 88%|████████▊ | 1756/2000 [15:41<02:08,  1.91it/s]

Training_loss 5.97407


 88%|████████▊ | 1757/2000 [15:42<02:06,  1.92it/s]

Training_loss 5.97138


 88%|████████▊ | 1758/2000 [15:42<02:08,  1.89it/s]

Training_loss 5.96939


 88%|████████▊ | 1759/2000 [15:43<02:07,  1.89it/s]

Training_loss 5.96434


 88%|████████▊ | 1760/2000 [15:43<02:01,  1.97it/s]

Training_loss 5.96315


 88%|████████▊ | 1761/2000 [15:44<01:59,  2.00it/s]

Training_loss 5.96008


 88%|████████▊ | 1762/2000 [15:44<02:02,  1.94it/s]

Training_loss 5.95916


 88%|████████▊ | 1763/2000 [15:45<02:08,  1.85it/s]

Training_loss 5.95484


 88%|████████▊ | 1764/2000 [15:45<02:10,  1.81it/s]

Training_loss 5.95245


 88%|████████▊ | 1765/2000 [15:46<02:13,  1.76it/s]

Training_loss 5.94935


 88%|████████▊ | 1766/2000 [15:47<02:11,  1.78it/s]

Training_loss 5.94856


 88%|████████▊ | 1767/2000 [15:47<02:05,  1.85it/s]

Training_loss 5.94743


 88%|████████▊ | 1768/2000 [15:48<02:03,  1.87it/s]

Training_loss 5.94543


 88%|████████▊ | 1769/2000 [15:48<02:03,  1.87it/s]

Training_loss 5.94423


 88%|████████▊ | 1770/2000 [15:49<02:05,  1.84it/s]

Training_loss 5.93966


 89%|████████▊ | 1771/2000 [15:49<02:00,  1.90it/s]

Training_loss 5.93666


 89%|████████▊ | 1772/2000 [15:50<01:48,  2.11it/s]

Training_loss 5.93118


 89%|████████▊ | 1773/2000 [15:50<01:46,  2.13it/s]

Training_loss 5.93012


 89%|████████▊ | 1774/2000 [15:50<01:45,  2.15it/s]

Training_loss 5.92735


 89%|████████▉ | 1775/2000 [15:51<01:34,  2.38it/s]

Training_loss 5.92499


 89%|████████▉ | 1776/2000 [15:51<01:39,  2.24it/s]

Training_loss 5.92336


 89%|████████▉ | 1777/2000 [15:52<01:44,  2.13it/s]

Training_loss 5.92146


 89%|████████▉ | 1778/2000 [15:52<01:47,  2.07it/s]

Training_loss 5.91728


 89%|████████▉ | 1779/2000 [15:53<01:45,  2.10it/s]

Training_loss 5.91693


 89%|████████▉ | 1780/2000 [15:53<01:48,  2.03it/s]

Training_loss 5.91247


 89%|████████▉ | 1781/2000 [15:54<01:49,  1.99it/s]

Training_loss 5.91287


 89%|████████▉ | 1782/2000 [15:54<01:50,  1.98it/s]

Training_loss 5.91322


 89%|████████▉ | 1783/2000 [15:55<01:49,  1.99it/s]

Training_loss 5.90736


 89%|████████▉ | 1784/2000 [15:55<01:47,  2.01it/s]

Training_loss 5.90457


 89%|████████▉ | 1785/2000 [15:56<01:46,  2.02it/s]

Training_loss 5.90070


 89%|████████▉ | 1786/2000 [15:56<01:47,  1.99it/s]

Training_loss 5.90241


 89%|████████▉ | 1787/2000 [15:57<01:49,  1.95it/s]

Training_loss 5.90023


 89%|████████▉ | 1788/2000 [15:57<01:49,  1.94it/s]

Training_loss 5.89837


 89%|████████▉ | 1789/2000 [15:58<01:52,  1.88it/s]

Training_loss 5.89589


 90%|████████▉ | 1790/2000 [15:58<01:50,  1.90it/s]

Training_loss 5.89492


 90%|████████▉ | 1791/2000 [15:59<01:47,  1.94it/s]

Training_loss 5.89095


 90%|████████▉ | 1792/2000 [15:59<01:47,  1.93it/s]

Training_loss 5.89046


 90%|████████▉ | 1793/2000 [16:00<01:47,  1.92it/s]

Training_loss 5.89035


 90%|████████▉ | 1794/2000 [16:01<01:50,  1.87it/s]

Training_loss 5.88759


 90%|████████▉ | 1795/2000 [16:01<01:46,  1.92it/s]

Training_loss 5.88527


 90%|████████▉ | 1796/2000 [16:02<01:46,  1.92it/s]

Training_loss 5.88297


 90%|████████▉ | 1797/2000 [16:02<01:45,  1.92it/s]

Training_loss 5.87849


 90%|████████▉ | 1798/2000 [16:03<01:46,  1.90it/s]

Training_loss 5.87118


 90%|████████▉ | 1799/2000 [16:03<01:45,  1.90it/s]

Training_loss 5.86622


 90%|█████████ | 1800/2000 [16:04<01:49,  1.83it/s]

Training_loss 5.86382


 90%|█████████ | 1801/2000 [16:04<01:47,  1.84it/s]

Training_loss 5.86401


 90%|█████████ | 1802/2000 [16:05<01:46,  1.87it/s]

Training_loss 5.86180


 90%|█████████ | 1803/2000 [16:05<01:42,  1.92it/s]

Training_loss 5.86292


 90%|█████████ | 1804/2000 [16:06<01:31,  2.14it/s]

Training_loss 5.86290


 90%|█████████ | 1805/2000 [16:06<01:32,  2.12it/s]

Training_loss 5.85864


 90%|█████████ | 1806/2000 [16:07<01:31,  2.12it/s]

Training_loss 5.85842


 90%|█████████ | 1807/2000 [16:07<01:28,  2.17it/s]

Training_loss 5.85746


 90%|█████████ | 1808/2000 [16:08<01:34,  2.04it/s]

Training_loss 5.85131


 90%|█████████ | 1809/2000 [16:08<01:37,  1.96it/s]

Training_loss 5.84365


 90%|█████████ | 1810/2000 [16:09<01:32,  2.04it/s]

Training_loss 5.84486


 91%|█████████ | 1811/2000 [16:09<01:34,  1.99it/s]

Training_loss 5.84558


 91%|█████████ | 1812/2000 [16:10<01:30,  2.08it/s]

Training_loss 5.84017


 91%|█████████ | 1813/2000 [16:10<01:27,  2.14it/s]

Training_loss 5.83665


 91%|█████████ | 1814/2000 [16:10<01:26,  2.14it/s]

Training_loss 5.83143


 91%|█████████ | 1815/2000 [16:11<01:23,  2.22it/s]

Training_loss 5.83136


 91%|█████████ | 1816/2000 [16:11<01:23,  2.20it/s]

Training_loss 5.82856


 91%|█████████ | 1817/2000 [16:12<01:23,  2.19it/s]

Training_loss 5.82603


 91%|█████████ | 1818/2000 [16:12<01:21,  2.25it/s]

Training_loss 5.81902


 91%|█████████ | 1819/2000 [16:13<01:21,  2.22it/s]

Training_loss 5.81650


 91%|█████████ | 1820/2000 [16:13<01:15,  2.38it/s]

Training_loss 5.81461


 91%|█████████ | 1821/2000 [16:13<01:15,  2.38it/s]

Training_loss 5.81236


 91%|█████████ | 1822/2000 [16:14<01:14,  2.40it/s]

Training_loss 5.80617


 91%|█████████ | 1823/2000 [16:14<01:11,  2.47it/s]

Training_loss 5.80520


 91%|█████████ | 1824/2000 [16:15<01:12,  2.43it/s]

Training_loss 5.80100


 91%|█████████▏| 1825/2000 [16:15<01:12,  2.41it/s]

Training_loss 5.79829


 91%|█████████▏| 1826/2000 [16:15<01:11,  2.42it/s]

Training_loss 5.79590


 91%|█████████▏| 1827/2000 [16:16<01:11,  2.40it/s]

Training_loss 5.79166


 91%|█████████▏| 1828/2000 [16:16<01:11,  2.40it/s]

Training_loss 5.78714


 91%|█████████▏| 1829/2000 [16:17<01:12,  2.37it/s]

Training_loss 5.78289


 92%|█████████▏| 1830/2000 [16:17<01:13,  2.31it/s]

Training_loss 5.78280


 92%|█████████▏| 1831/2000 [16:18<01:12,  2.32it/s]

Training_loss 5.77940


 92%|█████████▏| 1832/2000 [16:18<01:10,  2.38it/s]

Training_loss 5.77493


 92%|█████████▏| 1833/2000 [16:18<01:08,  2.43it/s]

Training_loss 5.77178


 92%|█████████▏| 1834/2000 [16:19<01:09,  2.38it/s]

Training_loss 5.76759


 92%|█████████▏| 1835/2000 [16:19<01:10,  2.34it/s]

Training_loss 5.76618


 92%|█████████▏| 1836/2000 [16:20<01:09,  2.35it/s]

Training_loss 5.76408


 92%|█████████▏| 1837/2000 [16:20<01:10,  2.31it/s]

Training_loss 5.76264


 92%|█████████▏| 1838/2000 [16:21<01:09,  2.33it/s]

Training_loss 5.76081


 92%|█████████▏| 1839/2000 [16:21<01:08,  2.34it/s]

Training_loss 5.75829


 92%|█████████▏| 1840/2000 [16:21<01:03,  2.53it/s]

Training_loss 5.75221


 92%|█████████▏| 1841/2000 [16:22<00:56,  2.83it/s]

Training_loss 5.74930


 92%|█████████▏| 1842/2000 [16:22<00:54,  2.92it/s]

Training_loss 5.74866


 92%|█████████▏| 1843/2000 [16:22<00:50,  3.09it/s]

Training_loss 5.74463


 92%|█████████▏| 1844/2000 [16:23<00:49,  3.15it/s]

Training_loss 5.74214


 92%|█████████▏| 1845/2000 [16:23<00:51,  3.03it/s]

Training_loss 5.73974


 92%|█████████▏| 1846/2000 [16:23<00:54,  2.84it/s]

Training_loss 5.73976


 92%|█████████▏| 1847/2000 [16:24<00:59,  2.56it/s]

Training_loss 5.73517


 92%|█████████▏| 1848/2000 [16:24<01:01,  2.48it/s]

Training_loss 5.73169


 92%|█████████▏| 1849/2000 [16:25<01:04,  2.35it/s]

Training_loss 5.72893


 92%|█████████▎| 1850/2000 [16:25<01:03,  2.37it/s]

Training_loss 5.72579


 93%|█████████▎| 1851/2000 [16:25<00:59,  2.49it/s]

Training_loss 5.72350


 93%|█████████▎| 1852/2000 [16:26<01:02,  2.36it/s]

Training_loss 5.71967


 93%|█████████▎| 1853/2000 [16:26<01:00,  2.41it/s]

Training_loss 5.71675


 93%|█████████▎| 1854/2000 [16:27<01:00,  2.42it/s]

Training_loss 5.71431


 93%|█████████▎| 1855/2000 [16:27<00:59,  2.43it/s]

Training_loss 5.71086


 93%|█████████▎| 1856/2000 [16:28<00:58,  2.46it/s]

Training_loss 5.70752


 93%|█████████▎| 1857/2000 [16:28<01:00,  2.38it/s]

Training_loss 5.70553


 93%|█████████▎| 1858/2000 [16:28<01:00,  2.34it/s]

Training_loss 5.70018


 93%|█████████▎| 1859/2000 [16:29<01:00,  2.34it/s]

Training_loss 5.69913


 93%|█████████▎| 1860/2000 [16:29<00:58,  2.40it/s]

Training_loss 5.69687


 93%|█████████▎| 1861/2000 [16:30<00:57,  2.41it/s]

Training_loss 5.69581


 93%|█████████▎| 1862/2000 [16:30<00:56,  2.43it/s]

Training_loss 5.69150


 93%|█████████▎| 1863/2000 [16:31<00:59,  2.30it/s]

Training_loss 5.68854


 93%|█████████▎| 1864/2000 [16:31<00:58,  2.33it/s]

Training_loss 5.68618


 93%|█████████▎| 1865/2000 [16:31<00:57,  2.35it/s]

Training_loss 5.68460


 93%|█████████▎| 1866/2000 [16:32<00:55,  2.43it/s]

Training_loss 5.68188


 93%|█████████▎| 1867/2000 [16:32<00:54,  2.43it/s]

Training_loss 5.67910


 93%|█████████▎| 1868/2000 [16:33<00:58,  2.26it/s]

Training_loss 5.67811


 93%|█████████▎| 1869/2000 [16:33<00:57,  2.29it/s]

Training_loss 5.67447


 94%|█████████▎| 1870/2000 [16:33<00:55,  2.34it/s]

Training_loss 5.67183


 94%|█████████▎| 1871/2000 [16:34<00:55,  2.33it/s]

Training_loss 5.66790


 94%|█████████▎| 1872/2000 [16:34<00:54,  2.34it/s]

Training_loss 5.66613


 94%|█████████▎| 1873/2000 [16:35<00:54,  2.33it/s]

Training_loss 5.66525


 94%|█████████▎| 1874/2000 [16:35<00:51,  2.43it/s]

Training_loss 5.66164


 94%|█████████▍| 1875/2000 [16:36<00:51,  2.42it/s]

Training_loss 5.65972


 94%|█████████▍| 1876/2000 [16:36<00:51,  2.42it/s]

Training_loss 5.65309


 94%|█████████▍| 1877/2000 [16:36<00:51,  2.38it/s]

Training_loss 5.65128


 94%|█████████▍| 1878/2000 [16:37<00:53,  2.28it/s]

Training_loss 5.64963


 94%|█████████▍| 1879/2000 [16:37<00:49,  2.44it/s]

Training_loss 5.64978


 94%|█████████▍| 1880/2000 [16:37<00:42,  2.80it/s]

Training_loss 5.64774


 94%|█████████▍| 1882/2000 [16:38<00:35,  3.30it/s]

Training_loss 5.64633
Training_loss 5.64261


 94%|█████████▍| 1883/2000 [16:38<00:34,  3.41it/s]

Training_loss 5.64011


 94%|█████████▍| 1884/2000 [16:39<00:34,  3.38it/s]

Training_loss 5.63694


 94%|█████████▍| 1885/2000 [16:39<00:41,  2.80it/s]

Training_loss 5.63620


 94%|█████████▍| 1886/2000 [16:39<00:41,  2.77it/s]

Training_loss 5.63498


 94%|█████████▍| 1887/2000 [16:40<00:43,  2.62it/s]

Training_loss 5.63332


 94%|█████████▍| 1888/2000 [16:40<00:43,  2.57it/s]

Training_loss 5.62881


 94%|█████████▍| 1889/2000 [16:41<00:46,  2.40it/s]

Training_loss 5.62726


 94%|█████████▍| 1890/2000 [16:41<00:44,  2.45it/s]

Training_loss 5.62479


 95%|█████████▍| 1891/2000 [16:42<00:45,  2.37it/s]

Training_loss 5.62279


 95%|█████████▍| 1892/2000 [16:42<00:44,  2.43it/s]

Training_loss 5.61909


 95%|█████████▍| 1893/2000 [16:42<00:42,  2.52it/s]

Training_loss 5.61682


 95%|█████████▍| 1894/2000 [16:43<00:42,  2.48it/s]

Training_loss 5.61560


 95%|█████████▍| 1895/2000 [16:43<00:42,  2.50it/s]

Training_loss 5.61090


 95%|█████████▍| 1896/2000 [16:44<00:43,  2.38it/s]

Training_loss 5.60687


 95%|█████████▍| 1897/2000 [16:44<00:43,  2.39it/s]

Training_loss 5.60323


 95%|█████████▍| 1898/2000 [16:44<00:41,  2.45it/s]

Training_loss 5.60307


 95%|█████████▍| 1899/2000 [16:45<00:41,  2.42it/s]

Training_loss 5.59868


 95%|█████████▌| 1900/2000 [16:45<00:40,  2.47it/s]

Training_loss 5.59812


 95%|█████████▌| 1901/2000 [16:46<00:41,  2.39it/s]

Training_loss 5.59641


 95%|█████████▌| 1902/2000 [16:46<00:44,  2.19it/s]

Training_loss 5.59381


 95%|█████████▌| 1903/2000 [16:47<00:47,  2.05it/s]

Training_loss 5.59099


 95%|█████████▌| 1904/2000 [16:47<00:44,  2.18it/s]

Training_loss 5.59108


 95%|█████████▌| 1905/2000 [16:48<00:42,  2.23it/s]

Training_loss 5.58586


 95%|█████████▌| 1906/2000 [16:48<00:41,  2.26it/s]

Training_loss 5.58368


 95%|█████████▌| 1907/2000 [16:49<00:42,  2.19it/s]

Training_loss 5.58326


 95%|█████████▌| 1908/2000 [16:49<00:43,  2.09it/s]

Training_loss 5.58103


 95%|█████████▌| 1909/2000 [16:50<00:42,  2.14it/s]

Training_loss 5.57989


 96%|█████████▌| 1910/2000 [16:50<00:41,  2.16it/s]

Training_loss 5.57728


 96%|█████████▌| 1911/2000 [16:50<00:40,  2.18it/s]

Training_loss 5.57507


 96%|█████████▌| 1912/2000 [16:51<00:39,  2.22it/s]

Training_loss 5.57216


 96%|█████████▌| 1913/2000 [16:51<00:38,  2.28it/s]

Training_loss 5.57029


 96%|█████████▌| 1914/2000 [16:52<00:36,  2.37it/s]

Training_loss 5.56764


 96%|█████████▌| 1915/2000 [16:52<00:36,  2.35it/s]

Training_loss 5.56706


 96%|█████████▌| 1916/2000 [16:53<00:36,  2.28it/s]

Training_loss 5.56528


 96%|█████████▌| 1917/2000 [16:53<00:36,  2.30it/s]

Training_loss 5.56030


 96%|█████████▌| 1918/2000 [16:53<00:35,  2.32it/s]

Training_loss 5.55606


 96%|█████████▌| 1919/2000 [16:54<00:32,  2.46it/s]

Training_loss 5.55418


 96%|█████████▌| 1920/2000 [16:54<00:31,  2.54it/s]

Training_loss 5.55141


 96%|█████████▌| 1921/2000 [16:55<00:31,  2.48it/s]

Training_loss 5.54834


 96%|█████████▌| 1922/2000 [16:55<00:32,  2.42it/s]

Training_loss 5.54702


 96%|█████████▌| 1923/2000 [16:55<00:33,  2.30it/s]

Training_loss 5.54580


 96%|█████████▌| 1924/2000 [16:56<00:35,  2.16it/s]

Training_loss 5.54285


 96%|█████████▋| 1925/2000 [16:56<00:35,  2.13it/s]

Training_loss 5.53908


 96%|█████████▋| 1926/2000 [16:57<00:35,  2.08it/s]

Training_loss 5.53608


 96%|█████████▋| 1927/2000 [16:57<00:35,  2.03it/s]

Training_loss 5.53273


 96%|█████████▋| 1928/2000 [16:58<00:36,  1.97it/s]

Training_loss 5.52940


 96%|█████████▋| 1929/2000 [16:58<00:33,  2.14it/s]

Training_loss 5.52653


 96%|█████████▋| 1930/2000 [16:59<00:32,  2.17it/s]

Training_loss 5.52418


 97%|█████████▋| 1931/2000 [16:59<00:31,  2.21it/s]

Training_loss 5.52179


 97%|█████████▋| 1932/2000 [17:00<00:30,  2.24it/s]

Training_loss 5.52127


 97%|█████████▋| 1933/2000 [17:00<00:29,  2.30it/s]

Training_loss 5.51829


 97%|█████████▋| 1934/2000 [17:01<00:28,  2.33it/s]

Training_loss 5.51878


 97%|█████████▋| 1935/2000 [17:01<00:27,  2.33it/s]

Training_loss 5.51657


 97%|█████████▋| 1936/2000 [17:01<00:28,  2.24it/s]

Training_loss 5.51706


 97%|█████████▋| 1937/2000 [17:02<00:27,  2.29it/s]

Training_loss 5.51345


 97%|█████████▋| 1938/2000 [17:02<00:26,  2.36it/s]

Training_loss 5.51240


 97%|█████████▋| 1939/2000 [17:03<00:25,  2.38it/s]

Training_loss 5.51060


 97%|█████████▋| 1940/2000 [17:03<00:25,  2.38it/s]

Training_loss 5.50711


 97%|█████████▋| 1941/2000 [17:04<00:24,  2.37it/s]

Training_loss 5.50467


 97%|█████████▋| 1942/2000 [17:04<00:24,  2.41it/s]

Training_loss 5.50234


 97%|█████████▋| 1943/2000 [17:04<00:23,  2.43it/s]

Training_loss 5.49854


 97%|█████████▋| 1944/2000 [17:05<00:22,  2.46it/s]

Training_loss 5.49509


 97%|█████████▋| 1945/2000 [17:05<00:23,  2.38it/s]

Training_loss 5.49483


 97%|█████████▋| 1946/2000 [17:06<00:22,  2.35it/s]

Training_loss 5.49304


 97%|█████████▋| 1947/2000 [17:06<00:22,  2.36it/s]

Training_loss 5.49031


 97%|█████████▋| 1948/2000 [17:06<00:21,  2.41it/s]

Training_loss 5.48786


 97%|█████████▋| 1949/2000 [17:07<00:20,  2.49it/s]

Training_loss 5.48695


 98%|█████████▊| 1950/2000 [17:07<00:19,  2.51it/s]

Training_loss 5.48474


 98%|█████████▊| 1951/2000 [17:08<00:19,  2.49it/s]

Training_loss 5.48495


 98%|█████████▊| 1952/2000 [17:08<00:19,  2.49it/s]

Training_loss 5.48155


 98%|█████████▊| 1953/2000 [17:08<00:19,  2.46it/s]

Training_loss 5.48049


 98%|█████████▊| 1954/2000 [17:09<00:19,  2.36it/s]

Training_loss 5.48244


 98%|█████████▊| 1956/2000 [17:09<00:15,  2.87it/s]

Training_loss 5.48171
Training_loss 5.48020


 98%|█████████▊| 1957/2000 [17:10<00:15,  2.81it/s]

Training_loss 5.47570


 98%|█████████▊| 1958/2000 [17:10<00:14,  2.92it/s]

Training_loss 5.47658


 98%|█████████▊| 1959/2000 [17:10<00:13,  3.14it/s]

Training_loss 5.47476


 98%|█████████▊| 1960/2000 [17:11<00:12,  3.30it/s]

Training_loss 5.47412


 98%|█████████▊| 1961/2000 [17:11<00:13,  2.84it/s]

Training_loss 5.47168


 98%|█████████▊| 1962/2000 [17:12<00:14,  2.70it/s]

Training_loss 5.46606


 98%|█████████▊| 1963/2000 [17:12<00:13,  2.66it/s]

Training_loss 5.46403


 98%|█████████▊| 1964/2000 [17:12<00:13,  2.57it/s]

Training_loss 5.46487


 98%|█████████▊| 1965/2000 [17:13<00:13,  2.55it/s]

Training_loss 5.46474


 98%|█████████▊| 1966/2000 [17:13<00:13,  2.50it/s]

Training_loss 5.45869


 98%|█████████▊| 1967/2000 [17:14<00:13,  2.49it/s]

Training_loss 5.45404


 98%|█████████▊| 1968/2000 [17:14<00:13,  2.41it/s]

Training_loss 5.44775


 98%|█████████▊| 1969/2000 [17:14<00:12,  2.44it/s]

Training_loss 5.44926


 98%|█████████▊| 1970/2000 [17:15<00:12,  2.38it/s]

Training_loss 5.44667


 99%|█████████▊| 1971/2000 [17:15<00:12,  2.37it/s]

Training_loss 5.44148


 99%|█████████▊| 1972/2000 [17:16<00:11,  2.35it/s]

Training_loss 5.43814


 99%|█████████▊| 1973/2000 [17:16<00:11,  2.44it/s]

Training_loss 5.43324


 99%|█████████▊| 1974/2000 [17:16<00:10,  2.50it/s]

Training_loss 5.43266


 99%|█████████▉| 1975/2000 [17:17<00:09,  2.51it/s]

Training_loss 5.43235


 99%|█████████▉| 1976/2000 [17:17<00:09,  2.46it/s]

Training_loss 5.43006


 99%|█████████▉| 1977/2000 [17:18<00:09,  2.43it/s]

Training_loss 5.42801


 99%|█████████▉| 1978/2000 [17:18<00:09,  2.41it/s]

Training_loss 5.42674


 99%|█████████▉| 1979/2000 [17:19<00:08,  2.39it/s]

Training_loss 5.42425


 99%|█████████▉| 1980/2000 [17:19<00:08,  2.39it/s]

Training_loss 5.42194


 99%|█████████▉| 1981/2000 [17:19<00:07,  2.42it/s]

Training_loss 5.41897


 99%|█████████▉| 1982/2000 [17:20<00:07,  2.35it/s]

Training_loss 5.41752


 99%|█████████▉| 1983/2000 [17:20<00:07,  2.41it/s]

Training_loss 5.41739


 99%|█████████▉| 1984/2000 [17:21<00:06,  2.46it/s]

Training_loss 5.41291


 99%|█████████▉| 1985/2000 [17:21<00:06,  2.40it/s]

Training_loss 5.41004


 99%|█████████▉| 1986/2000 [17:21<00:05,  2.39it/s]

Training_loss 5.40578


 99%|█████████▉| 1987/2000 [17:22<00:05,  2.42it/s]

Training_loss 5.40084


 99%|█████████▉| 1988/2000 [17:22<00:05,  2.36it/s]

Training_loss 5.39873


 99%|█████████▉| 1989/2000 [17:23<00:04,  2.44it/s]

Training_loss 5.39716


100%|█████████▉| 1990/2000 [17:23<00:04,  2.40it/s]

Training_loss 5.39331


100%|█████████▉| 1991/2000 [17:24<00:03,  2.50it/s]

Training_loss 5.39296


100%|█████████▉| 1992/2000 [17:24<00:03,  2.45it/s]

Training_loss 5.39192


100%|█████████▉| 1993/2000 [17:24<00:02,  2.38it/s]

Training_loss 5.39093


100%|█████████▉| 1994/2000 [17:25<00:02,  2.39it/s]

Training_loss 5.38919


100%|█████████▉| 1995/2000 [17:25<00:02,  2.43it/s]

Training_loss 5.38822


100%|█████████▉| 1996/2000 [17:25<00:01,  2.66it/s]

Training_loss 5.38679


100%|█████████▉| 1997/2000 [17:26<00:01,  2.87it/s]

Training_loss 5.38565


100%|█████████▉| 1998/2000 [17:26<00:00,  2.85it/s]

Training_loss 5.38601


100%|██████████| 2000/2000 [17:27<00:00,  3.20it/s]

Training_loss 5.38063
Training_loss 5.37818


100%|██████████| 2000/2000 [17:27<00:00,  1.91it/s]


In [22]:
#Training_loss 5.33078 with no communication

In [23]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([ 5.7469e-01, -6.6127e-02,  1.1013e+00, -1.1763e-01,  2.5120e-01,
        -1.6853e-01,  2.5237e-01, -3.4585e-01,  1.9780e-01, -2.0594e-01,
         6.3457e-01, -5.4500e-02,  2.2677e-01, -1.5153e-03,  1.9216e-01,
         3.7020e-01,  1.1849e-01, -1.8998e+00,  3.4578e-01,  7.1996e-02,
         4.6702e-01,  7.6325e-01, -6.2360e-01, -4.1334e-01, -4.8626e-01,
         3.9331e-01,  4.0774e-01,  4.7950e-01,  4.6535e-01, -1.0426e+00,
        -2.1749e-01,  6.8207e-01, -8.9210e-01,  6.8191e-01,  1.4875e+00,
         6.0472e-01,  1.2912e+00,  2.0462e+00,  1.3082e+00,  2.3663e+00],
       grad_fn=<CatBackward0>)

In [24]:
for j in G.neighbors(0):
    print(j)

3
4
8
9
13
19


In [25]:
parameters_to_vector(models[0].parameters())

tensor([-0.6882,  0.3561, -0.3982, -0.1066, -0.3581, -0.7024, -0.4494, -0.1482,
        -0.1098,  0.4188, -2.1072,  0.4426, -0.4472,  0.4188, -0.3473,  0.4734,
         0.5899, -0.4091, -1.5525, -0.0772,  0.2948,  0.3083,  0.2117, -0.0732,
         0.3195,  1.3334,  0.5180, -0.0885, -0.0948, -0.1414,  0.0878,  0.2232,
         0.1546, -0.2576, -0.0959, -0.2079,  1.1979,  2.4298,  2.1192,  0.2582],
       grad_fn=<CatBackward0>)

In [26]:
projection_list[0]

[0,
 0,
 0,
 tensor([[ 1.1336, -0.0089,  0.0065,  ..., -0.0275, -0.0318,  0.0065],
         [ 0.0083,  1.1164,  0.0065,  ..., -0.0275, -0.0318,  0.0065],
         [ 0.0083, -0.0089,  1.1319,  ..., -0.0275, -0.0318,  0.0065],
         ...,
         [ 0.0083, -0.0089,  0.0065,  ...,  1.0979, -0.0318,  0.0065],
         [ 0.0083, -0.0089,  0.0065,  ..., -0.0275,  1.0936,  0.0065],
         [ 0.0083, -0.0089,  0.0065,  ..., -0.0275, -0.0318,  1.1318]]),
 tensor([[ 1.1112, -0.0089,  0.0065,  ..., -0.0275, -0.0318,  0.0065],
         [ 0.0083,  1.0940,  0.0065,  ..., -0.0275, -0.0318,  0.0065],
         [ 0.0083, -0.0089,  1.1095,  ..., -0.0275, -0.0318,  0.0065],
         ...,
         [ 0.0083, -0.0089,  0.0065,  ...,  1.0754, -0.0318,  0.0065],
         [ 0.0083, -0.0089,  0.0065,  ..., -0.0275,  1.0711,  0.0065],
         [ 0.0083, -0.0089,  0.0065,  ..., -0.0275, -0.0318,  1.1094]]),
 0,
 0,
 0,
 tensor([[ 2.5852, -0.0089,  0.0065,  ..., -0.0275, -0.0318,  0.0065],
         [ 0.0083,  2

In [27]:
projected_weights[0]

[0,
 0,
 0,
 tensor([ 0.6434, -3.3085,  0.6246, -0.4497,  0.5906, -0.9084,  0.5327,  1.0913,
         -1.2100, -2.6651,  0.5632, -0.0314,  0.1215,  0.2904, -0.6331,  0.2643,
          1.6859,  0.4560,  0.1871, -0.1055, -0.1291,  0.1626, -0.3849, -0.2996,
          0.0417, -0.2084, -0.1940, -0.2122, -0.0903, -0.2500, -1.0773, -1.0703,
         -0.2790, -0.5134, -0.4243,  0.6511,  3.7782,  3.1960, -0.5023,  1.2676]),
 tensor([-2.3511, -0.0612, -0.3928, -0.2778, -0.3649, -1.1254, -0.5070,  1.6262,
         -0.5698,  0.7606, -3.1166,  0.5179, -0.2659, -0.2225, -1.0041,  0.4747,
          0.4901, -1.3377, -0.4538,  0.1384, -0.6638,  0.0543, -0.1453, -0.1051,
          0.3705, -0.3889, -0.7806, -1.6693, -0.4916, -0.0236,  0.1356,  0.4831,
         -0.5466,  0.3297,  0.1296,  1.4769,  2.6333,  3.2943,  0.6665,  2.1412]),
 0,
 0,
 0,
 tensor([-0.4932,  0.2572, -0.9071, -0.8343, -0.1674, -0.3993, -0.5750, -0.9374,
          1.1747, -0.0093, -0.0869, -0.1278, -0.2076, -0.4064, -0.1622,  0.0241,


In [28]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

In [29]:
print(test_loss)

[52.58379635 52.50533195 52.45319832 ...  5.38600981  5.38062725
  5.37818469]


In [30]:
np.save( 'training_loss_sheave_fml_lambda' + str(lamda).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), test_loss)
#np.save('relative_error_sheave_fml' + str(lamda).replace('.', '_'), total_rel_error)

In [31]:
'training_loss_sheave_fml' + str(lamda).replace('.', '_'), test_loss

('training_loss_sheave_fml0_001',
 array([52.58379635, 52.50533195, 52.45319832, ...,  5.38600981,
         5.38062725,  5.37818469]))