In [1]:
from keras.utils import to_categorical
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters
import networkx as nx

In [2]:
from sklearn.datasets import load_svmlight_file


def get_data(data):
    data = load_svmlight_file(data)
    return data[0].toarray(), data[1]
data = "./a1a.txt"
X, y = get_data(data)
y[y == -1] = 0
# Convert labels to one-hot encoding
y = to_categorical(y)

data = "./a1a_t"
X_test,y_test = get_data(data)
y_test[y_test == -1] = 0
y_test = to_categorical(y_test)

In [3]:
y[y[:, 0] == 1].shape
X.shape

(1605, 119)

In [4]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.shape[0])
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split

In [5]:
no_users = 30

In [6]:
X, y = random_split(X, y, 3, 42)

In [7]:
X1 = X[0][:, 0:50]
X2 = X[1][:, 30: 90]
X3 = X[2][:, 20:]
y1 = y[0]
y2 = y[1]
y3 = y[2]
subset_ranges = [np.arange(0, 50), np.arange(30, 90), np.arange(20, 119)]
subset_lengths = [subset_ranges[0].shape[0], subset_ranges[1].shape[0], subset_ranges[2].shape[0]]

In [8]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout, 5 * pout],[pout, pin, 5 * pout],[5 * pout, 5 * pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs, seed=0)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10, 10]
features_sizes = [8, 7, 6, 5]
pin = 0.5
pout = 0.1
seed = 0
alpha = 1e-2
lamda = 0#1e-2#1e-3
eta = 1e-2
d0 = min(subset_lengths)
no_users = sum(cluster_sizes)
batch_size = 10
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

# Set a random seed for reproducibility
seed = 17
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [9]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [10]:
y1.shape

(535, 2)

In [11]:
datapoints = {}
count = 0

X1, y1 = random_split(X1, y1, 10, 42)
X2, y2 = random_split(X2, y2, 10, 42)
X3, y3 = random_split(X3, y3, 10, 42)

X_train = [X1, X2, X3]
y_train = [y1, y2, y3]
input_sizes = [X1[0].shape[1], X2[0].shape[1], X3[0].shape[1]]

for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        
        test_features = X_test[:, subset_ranges[i]]
        test_label = y_test
        datapoints[count] = {
                'features': X_train[i][j],
                'degree': node_degree(count, G),
                'label': y_train[i][j],
                'neighbors': get_neighbors(count, G),
                'input_size': X_train[i][j].shape[1],
                'test_features':test_features,
                'test_labels': test_label
            }
        count += 1

In [12]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [13]:

class MLP_Net(nn.Module):
    def __init__(self, input_size, num_classes, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(input_size, num_classes, bias=True)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        output = F.softmax(self.fc1(x), dim=1)  # Applying softmax along the second dimension
        return output

In [14]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [15]:
model = MLP_Net(datapoints[0]["input_size"], 2, user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[0]["features"], datapoints[0]["label"]), batch_size=20, shuffle=True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(1000):
    for j, (x, y) in zip(range(1), dataloader):
        criterion = nn.CrossEntropyLoss()
        optimizer.zero_grad()
        yhat = model(x)
        
        # Calculate accuracy
        _, predicted = torch.max(yhat, 1)
        _, predicted_true = torch.max(y, 1)
        correct_predictions = (predicted == predicted_true).sum().item()
         

        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss.detach(), correct_predictions)
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

0 tensor(0.7243) 5
1 tensor(0.7168) 7
2 tensor(0.7072) 7
3 tensor(0.7037) 6
4 tensor(0.7381) 3
5 tensor(0.7233) 4
6 tensor(0.7124) 5
7 tensor(0.7149) 6
8 tensor(0.7011) 10
9 tensor(0.7044) 10
10 tensor(0.7113) 7
11 tensor(0.7167) 7
12 tensor(0.6863) 12
13 tensor(0.6950) 11
14 tensor(0.6973) 9
15 tensor(0.6853) 10
16 tensor(0.6938) 11
17 tensor(0.7039) 8
18 tensor(0.6882) 11
19 tensor(0.6918) 11
20 tensor(0.6919) 9
21 tensor(0.6995) 9
22 tensor(0.7058) 8
23 tensor(0.6918) 9
24 tensor(0.7008) 8
25 tensor(0.6934) 11
26 tensor(0.6974) 8
27 tensor(0.6901) 7
28 tensor(0.6792) 11
29 tensor(0.6794) 12
30 tensor(0.6853) 13
31 tensor(0.6725) 14
32 tensor(0.6809) 10
33 tensor(0.6903) 8
34 tensor(0.6936) 8
35 tensor(0.6877) 9
36 tensor(0.6837) 10
37 tensor(0.6899) 10
38 tensor(0.6755) 12
39 tensor(0.6671) 13
40 tensor(0.6928) 11
41 tensor(0.6868) 11
42 tensor(0.6961) 12
43 tensor(0.6730) 13
44 tensor(0.6791) 14
45 tensor(0.6768) 12
46 tensor(0.6698) 14
47 tensor(0.6851) 12
48 tensor(0.6689) 13
49 

403 tensor(0.5708) 15
404 tensor(0.5598) 15
405 tensor(0.6095) 13
406 tensor(0.5313) 16
407 tensor(0.5373) 17
408 tensor(0.5037) 17
409 tensor(0.5732) 14
410 tensor(0.5484) 15
411 tensor(0.5778) 14
412 tensor(0.5294) 16
413 tensor(0.4947) 18
414 tensor(0.5146) 16
415 tensor(0.4988) 17
416 tensor(0.5412) 16
417 tensor(0.5373) 16
418 tensor(0.5748) 14
419 tensor(0.5267) 16
420 tensor(0.5465) 16
421 tensor(0.5716) 14
422 tensor(0.5905) 14
423 tensor(0.6338) 12
424 tensor(0.5382) 16
425 tensor(0.5274) 17
426 tensor(0.5303) 16
427 tensor(0.5688) 15
428 tensor(0.5344) 16
429 tensor(0.5711) 14
430 tensor(0.5696) 14
431 tensor(0.5351) 16
432 tensor(0.5081) 16
433 tensor(0.5489) 15
434 tensor(0.4854) 18
435 tensor(0.5748) 14
436 tensor(0.5123) 17
437 tensor(0.6099) 13
438 tensor(0.5886) 14
439 tensor(0.5934) 13
440 tensor(0.5810) 14
441 tensor(0.5574) 14
442 tensor(0.5050) 17
443 tensor(0.4660) 19
444 tensor(0.5459) 15
445 tensor(0.6074) 13
446 tensor(0.5693) 14
447 tensor(0.6022) 13
448 tensor

832 tensor(0.4922) 16
833 tensor(0.5360) 15
834 tensor(0.5762) 13
835 tensor(0.5037) 16
836 tensor(0.5037) 17
837 tensor(0.4697) 17
838 tensor(0.5298) 15
839 tensor(0.5087) 16
840 tensor(0.4606) 18
841 tensor(0.5143) 16
842 tensor(0.5192) 16
843 tensor(0.4972) 17
844 tensor(0.5090) 16
845 tensor(0.4891) 17
846 tensor(0.5405) 15
847 tensor(0.4890) 17
848 tensor(0.4966) 17
849 tensor(0.5117) 16
850 tensor(0.5530) 15
851 tensor(0.4975) 17
852 tensor(0.5011) 17
853 tensor(0.4790) 17
854 tensor(0.5359) 15
855 tensor(0.5197) 16
856 tensor(0.4729) 17
857 tensor(0.5814) 13
858 tensor(0.4875) 17
859 tensor(0.5708) 14
860 tensor(0.4767) 17
861 tensor(0.5026) 16
862 tensor(0.4922) 17
863 tensor(0.5686) 14
864 tensor(0.5409) 15
865 tensor(0.5495) 14
866 tensor(0.5062) 17
867 tensor(0.4763) 18
868 tensor(0.5366) 15
869 tensor(0.5946) 13
870 tensor(0.5617) 15
871 tensor(0.4865) 17
872 tensor(0.4491) 18
873 tensor(0.5021) 17
874 tensor(0.5165) 15
875 tensor(0.4748) 18
876 tensor(0.5495) 15
877 tensor

In [16]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [17]:
# Preparing projection matrices
models = [MLP_Net(input_size=datapoints[i]['input_size'], num_classes=2, user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, models, first_run=True):
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        # Specify the dimensions of the rectangular matrix
                        row, column = d0, parameters_to_vector(models[i].parameters()).size()[0]

                        # Generate random values for the diagonal from a normal distribution
                        diag_values = 1.0 + 1.0 * torch.randn(min(row, column))

                        # Create a rectangular matrix with diagonal elements
                        mat = torch.diag(diag_values)

                        # If the matrix is larger than the diagonal vector, fill the remaining elements with zeros
                       
                        mat = torch.cat((mat, torch.zeros(row, column - row)), dim=1)

                        

                        # Append the matrix to the list
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[i].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[i][j], parameters_to_vector(models[i].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights, models)



In [18]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["test_features"], dataset["test_labels"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        _, pred = torch.max(output, 1)
        _, predicted_true = torch.max(labels, 1)
        correct += pred.eq(predicted_true.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    correct /= len(test_loader.dataset)
    
    return test_loss, correct

In [None]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(input_size=datapoints[i]['input_size'], num_classes=2, user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(input_size=datapoints[i]['input_size'], num_classes=2, user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.CrossEntropyLoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

in_cluster_proj_norm = []
out_cluster_proj_norm = []
in_cluster_proj_diff_norm = []
out_cluster_proj_diff_norm = []
task_loss = {'0':[],
                '1':[],
                '2':[]}
task_rel_error = {'0':[],
            '1':[],
            '2':[]}

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    projected_weights = []
    update_ProjWeight(projection_list, projected_weights, models, first_run=False)
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            temp_mat = torch.outer(projected_weights[i][j] - projected_weights[j][i], weights).clone()
            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * temp_mat)
    
    in_cluster_proj_norm_round = 0
    out_cluster_proj_norm_round = 0
    in_cluster_proj_diff_round = 0
    out_cluster_proj_diff_round = 0
    in_edges = 0
    out_edges = 0
    
    for i in range(no_users//3):
        for j in G.neighbors(i):
            if j < no_users//3:
                in_edges += 1
                in_cluster_proj_norm_round += torch.norm(projection_list[i][j]).detach().numpy()
                in_cluster_proj_diff_round += torch.norm(projected_weights[i][j] - projected_weights[j][i]).detach().numpy()
            else:
                out_edges += 1
                out_cluster_proj_norm_round += torch.norm(projection_list[i][j]).detach().numpy()
                out_cluster_proj_diff_round += torch.norm(projected_weights[i][j] - projected_weights[j][i]).detach().numpy()
    in_cluster_proj_norm.append(in_cluster_proj_norm_round / in_edges)
    out_cluster_proj_norm.append(out_cluster_proj_norm_round / out_edges)
    in_cluster_proj_diff_norm.append(in_cluster_proj_diff_round / in_edges)
    out_cluster_proj_diff_norm.append(out_cluster_proj_diff_round / out_edges)
            
                                         
                                              
    
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    user_rel_error = 0
    per_task_loss = []
    per_task_rel_error = []
    
    for k in range(no_users):
      
        g_loss, acc = testing(models[i], datapoints[i], 50, criterion)
        local_test_loss.append(g_loss)
        user_rel_error += acc#rel_error(models[i])
        if (k + 1) % 10 == 0:
            task_loss[str(k // 10)].append(sum(per_task_loss) / 10)
            task_rel_error[str(k // 10)].append(sum(per_task_rel_error) / 10)
            per_task_loss = []
            per_task_rel_error = []
        per_task_loss.append(g_loss)
        per_task_rel_error.append(acc)#rel_error(models[i]))
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    total_rel_error.append(user_rel_error / no_users)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f,   Accuracy %2.5f"% (test_loss[-1], total_rel_error[-1]))

  0%|          | 1/2000 [00:17<9:35:23, 17.27s/it]

Training_loss 0.70219,   Accuracy 0.47351


  0%|          | 2/2000 [00:32<8:57:50, 16.15s/it]

Training_loss 0.70101,   Accuracy 0.48139


  0%|          | 3/2000 [00:47<8:31:10, 15.36s/it]

Training_loss 0.70079,   Accuracy 0.48214


  0%|          | 4/2000 [01:00<8:06:08, 14.61s/it]

Training_loss 0.69918,   Accuracy 0.49186


  0%|          | 5/2000 [01:14<8:00:13, 14.44s/it]

Training_loss 0.69739,   Accuracy 0.50065


  0%|          | 6/2000 [01:28<7:53:08, 14.24s/it]

Training_loss 0.69646,   Accuracy 0.50339


  0%|          | 7/2000 [01:43<8:00:24, 14.46s/it]

Training_loss 0.69617,   Accuracy 0.50413


  0%|          | 8/2000 [02:00<8:27:35, 15.29s/it]

Training_loss 0.69541,   Accuracy 0.50892


  0%|          | 9/2000 [02:15<8:24:20, 15.20s/it]

Training_loss 0.69466,   Accuracy 0.51554


  0%|          | 10/2000 [02:29<8:11:58, 14.83s/it]

Training_loss 0.69340,   Accuracy 0.52071


  1%|          | 11/2000 [02:43<8:06:56, 14.69s/it]

Training_loss 0.69252,   Accuracy 0.52284


  1%|          | 12/2000 [02:58<8:02:29, 14.56s/it]

Training_loss 0.69298,   Accuracy 0.52084


  1%|          | 13/2000 [03:12<7:55:20, 14.35s/it]

Training_loss 0.69189,   Accuracy 0.53121


  1%|          | 14/2000 [03:25<7:44:50, 14.04s/it]

Training_loss 0.69049,   Accuracy 0.53996


  1%|          | 15/2000 [03:39<7:46:15, 14.09s/it]

Training_loss 0.68952,   Accuracy 0.55081


  1%|          | 16/2000 [03:55<8:02:41, 14.60s/it]

Training_loss 0.68811,   Accuracy 0.55543


  1%|          | 17/2000 [04:11<8:18:44, 15.09s/it]

Training_loss 0.68663,   Accuracy 0.56280


  1%|          | 18/2000 [04:32<9:15:27, 16.82s/it]

Training_loss 0.68623,   Accuracy 0.56377


  1%|          | 19/2000 [04:51<9:42:33, 17.64s/it]

Training_loss 0.68531,   Accuracy 0.56823


  1%|          | 20/2000 [05:08<9:34:56, 17.42s/it]

Training_loss 0.68418,   Accuracy 0.57359


  1%|          | 21/2000 [05:23<9:03:09, 16.47s/it]

Training_loss 0.68341,   Accuracy 0.57931


  1%|          | 22/2000 [05:37<8:38:09, 15.72s/it]

Training_loss 0.68215,   Accuracy 0.58719


  1%|          | 23/2000 [05:50<8:13:38, 14.98s/it]

Training_loss 0.68182,   Accuracy 0.58932


  1%|          | 24/2000 [06:06<8:26:59, 15.39s/it]

Training_loss 0.68010,   Accuracy 0.59772


  1%|▏         | 25/2000 [06:19<8:03:32, 14.69s/it]

Training_loss 0.67920,   Accuracy 0.60173


  1%|▏         | 26/2000 [06:33<7:55:43, 14.46s/it]

Training_loss 0.67876,   Accuracy 0.60328


  1%|▏         | 27/2000 [06:47<7:50:08, 14.30s/it]

Training_loss 0.67755,   Accuracy 0.60961


  1%|▏         | 28/2000 [07:01<7:44:11, 14.12s/it]

Training_loss 0.67710,   Accuracy 0.61203


  1%|▏         | 29/2000 [07:17<8:05:41, 14.79s/it]

Training_loss 0.67603,   Accuracy 0.61723


  2%|▏         | 30/2000 [07:31<7:52:33, 14.39s/it]

Training_loss 0.67535,   Accuracy 0.62088


  2%|▏         | 31/2000 [08:53<19:00:41, 34.76s/it]

Training_loss 0.67453,   Accuracy 0.62411


  2%|▏         | 32/2000 [10:47<32:01:03, 58.57s/it]

Training_loss 0.67392,   Accuracy 0.62789


  2%|▏         | 33/2000 [12:34<39:59:50, 73.20s/it]

Training_loss 0.67302,   Accuracy 0.63283


  2%|▏         | 34/2000 [14:21<45:30:59, 83.35s/it]

Training_loss 0.67235,   Accuracy 0.63529


  2%|▏         | 35/2000 [16:07<49:07:51, 90.01s/it]

Training_loss 0.67125,   Accuracy 0.64524


  2%|▏         | 36/2000 [17:54<51:50:30, 95.03s/it]

Training_loss 0.67059,   Accuracy 0.64866


  2%|▏         | 37/2000 [19:40<53:40:48, 98.45s/it]

Training_loss 0.66986,   Accuracy 0.65044


  2%|▏         | 38/2000 [21:27<55:04:58, 101.07s/it]

Training_loss 0.66852,   Accuracy 0.65638


  2%|▏         | 39/2000 [23:14<56:02:30, 102.88s/it]

Training_loss 0.66784,   Accuracy 0.65978


  2%|▏         | 40/2000 [25:04<57:02:31, 104.77s/it]

Training_loss 0.66698,   Accuracy 0.66611


  2%|▏         | 41/2000 [26:54<57:59:43, 106.58s/it]

Training_loss 0.66627,   Accuracy 0.66976


  2%|▏         | 42/2000 [28:44<58:25:25, 107.42s/it]

Training_loss 0.66529,   Accuracy 0.67522


  2%|▏         | 43/2000 [30:33<58:45:27, 108.09s/it]

Training_loss 0.66436,   Accuracy 0.67735


  2%|▏         | 44/2000 [32:22<58:50:28, 108.30s/it]

Training_loss 0.66305,   Accuracy 0.68184


  2%|▏         | 45/2000 [34:10<58:43:28, 108.14s/it]

Training_loss 0.66242,   Accuracy 0.68374


  2%|▏         | 46/2000 [35:56<58:16:43, 107.37s/it]

Training_loss 0.66089,   Accuracy 0.68811


  2%|▏         | 47/2000 [37:44<58:24:44, 107.67s/it]

Training_loss 0.66033,   Accuracy 0.68862


  2%|▏         | 48/2000 [39:31<58:13:07, 107.37s/it]

Training_loss 0.66005,   Accuracy 0.69095


  2%|▏         | 49/2000 [41:16<57:56:02, 106.90s/it]

Training_loss 0.65997,   Accuracy 0.69024


  2%|▎         | 50/2000 [43:01<57:29:53, 106.15s/it]

Training_loss 0.65965,   Accuracy 0.69134


  3%|▎         | 51/2000 [44:47<57:27:50, 106.14s/it]

Training_loss 0.65852,   Accuracy 0.70280


  3%|▎         | 52/2000 [46:38<58:12:13, 107.56s/it]

Training_loss 0.65779,   Accuracy 0.70397


  3%|▎         | 53/2000 [48:36<59:57:00, 110.85s/it]

Training_loss 0.65676,   Accuracy 0.71036


  3%|▎         | 54/2000 [50:35<61:09:31, 113.14s/it]

Training_loss 0.65614,   Accuracy 0.71417


  3%|▎         | 55/2000 [52:23<60:19:59, 111.67s/it]

Training_loss 0.65583,   Accuracy 0.71627


  3%|▎         | 56/2000 [54:07<59:03:19, 109.36s/it]

Training_loss 0.65510,   Accuracy 0.72067


  3%|▎         | 57/2000 [55:55<58:46:23, 108.90s/it]

Training_loss 0.65475,   Accuracy 0.72154


  3%|▎         | 58/2000 [57:42<58:32:32, 108.52s/it]

Training_loss 0.65420,   Accuracy 0.72610


  3%|▎         | 59/2000 [59:25<57:34:44, 106.79s/it]

Training_loss 0.65369,   Accuracy 0.72719


  3%|▎         | 60/2000 [1:01:19<58:42:15, 108.94s/it]

Training_loss 0.65255,   Accuracy 0.73007


  3%|▎         | 61/2000 [1:03:12<59:22:03, 110.22s/it]

Training_loss 0.65148,   Accuracy 0.73343


  3%|▎         | 62/2000 [1:05:15<61:16:48, 113.83s/it]

Training_loss 0.65062,   Accuracy 0.73527


  3%|▎         | 63/2000 [1:07:16<62:28:34, 116.12s/it]

Training_loss 0.64975,   Accuracy 0.73650


  3%|▎         | 64/2000 [1:09:16<63:02:59, 117.24s/it]

Training_loss 0.64945,   Accuracy 0.73711


  3%|▎         | 65/2000 [1:11:05<61:44:16, 114.86s/it]

Training_loss 0.64837,   Accuracy 0.74002


  3%|▎         | 66/2000 [1:12:52<60:27:48, 112.55s/it]

Training_loss 0.64761,   Accuracy 0.74073


  3%|▎         | 67/2000 [1:14:41<59:48:31, 111.39s/it]

Training_loss 0.64748,   Accuracy 0.74131


  3%|▎         | 68/2000 [1:16:28<59:05:52, 110.12s/it]

Training_loss 0.64666,   Accuracy 0.74221


  3%|▎         | 69/2000 [1:18:13<58:11:07, 108.48s/it]

Training_loss 0.64625,   Accuracy 0.74315


  4%|▎         | 70/2000 [1:19:58<57:40:04, 107.57s/it]

Training_loss 0.64520,   Accuracy 0.74493


  4%|▎         | 71/2000 [1:21:48<58:02:34, 108.32s/it]

Training_loss 0.64418,   Accuracy 0.74890


  4%|▎         | 72/2000 [1:23:34<57:29:49, 107.36s/it]

Training_loss 0.64346,   Accuracy 0.75055


  4%|▎         | 73/2000 [1:25:19<57:09:25, 106.78s/it]

Training_loss 0.64266,   Accuracy 0.75184


  4%|▎         | 74/2000 [1:27:12<58:11:33, 108.77s/it]

Training_loss 0.64183,   Accuracy 0.75239


  4%|▍         | 75/2000 [1:28:57<57:26:36, 107.43s/it]

Training_loss 0.64215,   Accuracy 0.75242


  4%|▍         | 76/2000 [1:30:43<57:10:14, 106.97s/it]

Training_loss 0.64210,   Accuracy 0.75236


  4%|▍         | 77/2000 [1:32:27<56:45:27, 106.25s/it]

Training_loss 0.64145,   Accuracy 0.75410


  4%|▍         | 78/2000 [1:34:16<57:06:26, 106.96s/it]

Training_loss 0.64086,   Accuracy 0.75559


  4%|▍         | 79/2000 [1:36:00<56:41:40, 106.25s/it]

Training_loss 0.64015,   Accuracy 0.75662


  4%|▍         | 80/2000 [1:37:45<56:25:18, 105.79s/it]

Training_loss 0.63949,   Accuracy 0.75821


  4%|▍         | 81/2000 [1:39:30<56:18:22, 105.63s/it]

Training_loss 0.63909,   Accuracy 0.75850


  4%|▍         | 82/2000 [1:41:17<56:29:18, 106.03s/it]

Training_loss 0.63970,   Accuracy 0.75901


  4%|▍         | 83/2000 [1:43:02<56:11:55, 105.54s/it]

Training_loss 0.63912,   Accuracy 0.75917


  4%|▍         | 84/2000 [1:44:45<55:47:42, 104.83s/it]

Training_loss 0.63906,   Accuracy 0.75979


  4%|▍         | 85/2000 [1:46:37<56:56:49, 107.05s/it]

Training_loss 0.63885,   Accuracy 0.75995


  4%|▍         | 86/2000 [1:48:21<56:20:05, 105.96s/it]

Training_loss 0.63854,   Accuracy 0.76001


  4%|▍         | 87/2000 [1:50:06<56:13:19, 105.80s/it]

Training_loss 0.63739,   Accuracy 0.76208


  4%|▍         | 88/2000 [1:51:51<56:03:11, 105.54s/it]

Training_loss 0.63684,   Accuracy 0.76234


  4%|▍         | 89/2000 [1:53:34<55:40:17, 104.88s/it]

Training_loss 0.63660,   Accuracy 0.76289


  4%|▍         | 90/2000 [1:55:20<55:45:51, 105.11s/it]

Training_loss 0.63562,   Accuracy 0.76308


  5%|▍         | 91/2000 [1:57:06<55:56:11, 105.49s/it]

Training_loss 0.63469,   Accuracy 0.76354


  5%|▍         | 92/2000 [1:59:01<57:27:37, 108.42s/it]

Training_loss 0.63461,   Accuracy 0.76412


  5%|▍         | 93/2000 [2:01:07<60:06:57, 113.49s/it]

Training_loss 0.63457,   Accuracy 0.76431


  5%|▍         | 94/2000 [2:03:16<62:32:18, 118.12s/it]

Training_loss 0.63418,   Accuracy 0.76476


  5%|▍         | 95/2000 [2:05:12<62:13:42, 117.60s/it]

Training_loss 0.63340,   Accuracy 0.76492


  5%|▍         | 96/2000 [2:07:15<63:00:18, 119.13s/it]

Training_loss 0.63297,   Accuracy 0.76522


  5%|▍         | 97/2000 [2:09:02<61:07:40, 115.64s/it]

Training_loss 0.63183,   Accuracy 0.76438


  5%|▍         | 98/2000 [2:10:52<60:06:53, 113.78s/it]

Training_loss 0.63153,   Accuracy 0.76389


  5%|▍         | 99/2000 [2:12:38<58:54:24, 111.55s/it]

Training_loss 0.63128,   Accuracy 0.76483


  5%|▌         | 100/2000 [2:14:29<58:51:03, 111.51s/it]

Training_loss 0.63121,   Accuracy 0.76573


  5%|▌         | 101/2000 [2:16:13<57:35:23, 109.18s/it]

Training_loss 0.63107,   Accuracy 0.76609


  5%|▌         | 102/2000 [2:17:59<57:00:29, 108.13s/it]

Training_loss 0.63066,   Accuracy 0.76628


  5%|▌         | 103/2000 [2:19:44<56:29:09, 107.20s/it]

Training_loss 0.63039,   Accuracy 0.76680


  5%|▌         | 104/2000 [2:21:27<55:52:45, 106.10s/it]

Training_loss 0.63040,   Accuracy 0.76770


  5%|▌         | 105/2000 [2:23:18<56:31:11, 107.37s/it]

Training_loss 0.63041,   Accuracy 0.76793


  5%|▌         | 106/2000 [2:25:00<55:44:54, 105.96s/it]

Training_loss 0.62921,   Accuracy 0.76822


  5%|▌         | 107/2000 [2:26:50<56:12:50, 106.90s/it]

Training_loss 0.62868,   Accuracy 0.76864


  5%|▌         | 108/2000 [2:28:34<55:45:42, 106.10s/it]

Training_loss 0.62853,   Accuracy 0.76796


  5%|▌         | 109/2000 [2:30:18<55:28:02, 105.60s/it]

Training_loss 0.62814,   Accuracy 0.76848


  6%|▌         | 110/2000 [2:32:05<55:34:14, 105.85s/it]

Training_loss 0.62789,   Accuracy 0.76841


  6%|▌         | 111/2000 [2:33:49<55:15:36, 105.31s/it]

Training_loss 0.62744,   Accuracy 0.76854


  6%|▌         | 112/2000 [2:35:33<55:04:41, 105.02s/it]

Training_loss 0.62676,   Accuracy 0.76922


  6%|▌         | 113/2000 [2:37:18<54:58:20, 104.88s/it]

Training_loss 0.62652,   Accuracy 0.76958


  6%|▌         | 114/2000 [2:39:04<55:09:02, 105.27s/it]

Training_loss 0.62575,   Accuracy 0.76980


  6%|▌         | 115/2000 [2:40:52<55:33:21, 106.10s/it]

Training_loss 0.62548,   Accuracy 0.77006


  6%|▌         | 116/2000 [2:42:38<55:34:10, 106.18s/it]

Training_loss 0.62492,   Accuracy 0.76967


  6%|▌         | 117/2000 [2:44:26<55:49:53, 106.74s/it]

Training_loss 0.62433,   Accuracy 0.76980


  6%|▌         | 118/2000 [2:46:11<55:33:00, 106.26s/it]

Training_loss 0.62374,   Accuracy 0.77003


  6%|▌         | 119/2000 [2:47:59<55:41:16, 106.58s/it]

Training_loss 0.62345,   Accuracy 0.76967


  6%|▌         | 120/2000 [2:49:47<55:57:18, 107.15s/it]

Training_loss 0.62307,   Accuracy 0.76922


  6%|▌         | 121/2000 [2:51:32<55:33:51, 106.46s/it]

Training_loss 0.62241,   Accuracy 0.76919


  6%|▌         | 122/2000 [2:53:17<55:18:39, 106.03s/it]

Training_loss 0.62199,   Accuracy 0.76935


  6%|▌         | 123/2000 [2:55:01<54:55:24, 105.34s/it]

Training_loss 0.62168,   Accuracy 0.76945


  6%|▌         | 124/2000 [2:56:49<55:17:21, 106.10s/it]

Training_loss 0.62131,   Accuracy 0.76990


  6%|▋         | 125/2000 [2:58:35<55:15:41, 106.10s/it]

Training_loss 0.62064,   Accuracy 0.76961


  6%|▋         | 126/2000 [3:00:28<56:18:40, 108.18s/it]

Training_loss 0.62051,   Accuracy 0.76987


  6%|▋         | 127/2000 [3:02:19<56:42:25, 108.99s/it]

Training_loss 0.62007,   Accuracy 0.76971


  6%|▋         | 128/2000 [3:04:15<57:48:06, 111.16s/it]

Training_loss 0.61928,   Accuracy 0.77003


  6%|▋         | 129/2000 [3:06:14<58:59:48, 113.52s/it]

Training_loss 0.61915,   Accuracy 0.77016


  6%|▋         | 130/2000 [3:08:10<59:17:46, 114.15s/it]

Training_loss 0.61870,   Accuracy 0.76993


  7%|▋         | 131/2000 [3:09:59<58:31:17, 112.72s/it]

Training_loss 0.61782,   Accuracy 0.76964


  7%|▋         | 132/2000 [3:11:46<57:36:00, 111.01s/it]

Training_loss 0.61727,   Accuracy 0.76967


  7%|▋         | 133/2000 [3:13:33<56:55:46, 109.77s/it]

Training_loss 0.61727,   Accuracy 0.76980


  7%|▋         | 134/2000 [3:15:16<55:49:20, 107.70s/it]

Training_loss 0.61715,   Accuracy 0.76996


  7%|▋         | 135/2000 [3:17:01<55:28:13, 107.07s/it]

Training_loss 0.61715,   Accuracy 0.77019


  7%|▋         | 136/2000 [3:18:46<55:08:24, 106.49s/it]

Training_loss 0.61648,   Accuracy 0.77009


  7%|▋         | 137/2000 [3:20:32<54:55:58, 106.15s/it]

Training_loss 0.61578,   Accuracy 0.76987


  7%|▋         | 138/2000 [3:22:19<54:59:28, 106.32s/it]

Training_loss 0.61489,   Accuracy 0.76954


  7%|▋         | 139/2000 [3:24:06<55:10:30, 106.73s/it]

Training_loss 0.61450,   Accuracy 0.76954


  7%|▋         | 140/2000 [3:25:54<55:18:55, 107.06s/it]

Training_loss 0.61421,   Accuracy 0.76954


  7%|▋         | 141/2000 [3:27:47<56:14:25, 108.91s/it]

Training_loss 0.61393,   Accuracy 0.76958


  7%|▋         | 142/2000 [3:29:35<56:04:18, 108.64s/it]

Training_loss 0.61341,   Accuracy 0.76977


  7%|▋         | 143/2000 [3:31:19<55:19:09, 107.24s/it]

Training_loss 0.61290,   Accuracy 0.76974


  7%|▋         | 144/2000 [3:33:03<54:44:50, 106.19s/it]

Training_loss 0.61295,   Accuracy 0.76977


  7%|▋         | 145/2000 [3:34:48<54:27:50, 105.70s/it]

Training_loss 0.61232,   Accuracy 0.76971


  7%|▋         | 146/2000 [3:36:32<54:17:18, 105.41s/it]

Training_loss 0.61173,   Accuracy 0.76977


  7%|▋         | 147/2000 [3:38:21<54:44:44, 106.36s/it]

Training_loss 0.61140,   Accuracy 0.77000


  7%|▋         | 148/2000 [3:40:09<54:56:34, 106.80s/it]

Training_loss 0.61107,   Accuracy 0.76983


  7%|▋         | 149/2000 [3:41:55<54:52:16, 106.72s/it]

Training_loss 0.61150,   Accuracy 0.77013


  8%|▊         | 150/2000 [3:43:40<54:33:15, 106.16s/it]

Training_loss 0.61116,   Accuracy 0.77003


  8%|▊         | 151/2000 [3:45:25<54:22:32, 105.87s/it]

Training_loss 0.61050,   Accuracy 0.76996


  8%|▊         | 152/2000 [3:47:09<54:02:12, 105.27s/it]

Training_loss 0.61004,   Accuracy 0.76980


  8%|▊         | 153/2000 [3:48:55<54:02:16, 105.33s/it]

Training_loss 0.61007,   Accuracy 0.76983


  8%|▊         | 154/2000 [3:50:41<54:14:12, 105.77s/it]

Training_loss 0.60987,   Accuracy 0.76987


  8%|▊         | 155/2000 [3:52:27<54:15:19, 105.86s/it]

Training_loss 0.60925,   Accuracy 0.76990


  8%|▊         | 156/2000 [3:54:12<54:01:56, 105.49s/it]

Training_loss 0.60973,   Accuracy 0.77003


  8%|▊         | 157/2000 [3:55:57<53:50:30, 105.17s/it]

Training_loss 0.60952,   Accuracy 0.77003


  8%|▊         | 158/2000 [3:57:42<53:52:14, 105.28s/it]

Training_loss 0.60923,   Accuracy 0.77013


  8%|▊         | 159/2000 [3:59:26<53:40:37, 104.96s/it]

Training_loss 0.60903,   Accuracy 0.77032


  8%|▊         | 160/2000 [4:01:20<54:56:06, 107.48s/it]

Training_loss 0.60846,   Accuracy 0.77006


  8%|▊         | 161/2000 [4:03:15<56:08:04, 109.89s/it]

Training_loss 0.60816,   Accuracy 0.77009


  8%|▊         | 162/2000 [4:05:11<57:03:45, 111.77s/it]

Training_loss 0.60828,   Accuracy 0.77006


  8%|▊         | 163/2000 [4:07:17<59:12:43, 116.04s/it]

Training_loss 0.60801,   Accuracy 0.77009


  8%|▊         | 164/2000 [4:09:15<59:23:00, 116.44s/it]

Training_loss 0.60750,   Accuracy 0.77003


  8%|▊         | 165/2000 [4:11:05<58:27:16, 114.68s/it]

Training_loss 0.60749,   Accuracy 0.77009


  8%|▊         | 166/2000 [4:12:55<57:44:33, 113.34s/it]

Training_loss 0.60756,   Accuracy 0.77019


  8%|▊         | 167/2000 [4:14:44<56:59:00, 111.92s/it]

Training_loss 0.60773,   Accuracy 0.77061


  8%|▊         | 168/2000 [4:16:29<55:55:18, 109.89s/it]

Training_loss 0.60741,   Accuracy 0.77045


  8%|▊         | 169/2000 [4:18:15<55:12:21, 108.54s/it]

Training_loss 0.60715,   Accuracy 0.77042


  8%|▊         | 170/2000 [4:20:00<54:42:04, 107.61s/it]

Training_loss 0.60712,   Accuracy 0.77077


  9%|▊         | 171/2000 [4:21:44<54:09:09, 106.59s/it]

Training_loss 0.60725,   Accuracy 0.77093


  9%|▊         | 172/2000 [4:23:33<54:26:43, 107.22s/it]

Training_loss 0.60733,   Accuracy 0.77100


  9%|▊         | 173/2000 [4:25:17<53:54:49, 106.23s/it]

Training_loss 0.60729,   Accuracy 0.77100


  9%|▊         | 174/2000 [4:27:06<54:20:45, 107.14s/it]

Training_loss 0.60708,   Accuracy 0.77097


  9%|▉         | 175/2000 [4:28:52<54:07:38, 106.77s/it]

Training_loss 0.60684,   Accuracy 0.77090


  9%|▉         | 176/2000 [4:30:42<54:32:07, 107.64s/it]

Training_loss 0.60622,   Accuracy 0.77087


  9%|▉         | 177/2000 [4:32:32<54:53:25, 108.40s/it]

Training_loss 0.60631,   Accuracy 0.77106


  9%|▉         | 178/2000 [4:34:22<55:06:51, 108.90s/it]

Training_loss 0.60636,   Accuracy 0.77109


  9%|▉         | 179/2000 [4:36:16<55:49:44, 110.37s/it]

Training_loss 0.60557,   Accuracy 0.77090


  9%|▉         | 180/2000 [4:38:08<56:01:18, 110.81s/it]

Training_loss 0.60608,   Accuracy 0.77100


  9%|▉         | 181/2000 [4:39:58<55:53:20, 110.61s/it]

Training_loss 0.60608,   Accuracy 0.77122


  9%|▉         | 182/2000 [4:41:44<55:11:29, 109.29s/it]

Training_loss 0.60581,   Accuracy 0.77119


  9%|▉         | 183/2000 [4:43:29<54:27:52, 107.91s/it]

Training_loss 0.60552,   Accuracy 0.77119


  9%|▉         | 184/2000 [4:45:11<53:32:40, 106.15s/it]

Training_loss 0.60541,   Accuracy 0.77119


  9%|▉         | 185/2000 [4:46:58<53:41:13, 106.49s/it]

Training_loss 0.60563,   Accuracy 0.77158


  9%|▉         | 186/2000 [4:48:45<53:42:21, 106.58s/it]

Training_loss 0.60508,   Accuracy 0.77145


  9%|▉         | 187/2000 [4:50:30<53:32:10, 106.30s/it]

Training_loss 0.60480,   Accuracy 0.77142


  9%|▉         | 188/2000 [4:52:17<53:28:48, 106.25s/it]

Training_loss 0.60480,   Accuracy 0.77155


  9%|▉         | 189/2000 [4:54:01<53:14:01, 105.82s/it]

Training_loss 0.60457,   Accuracy 0.77161


 10%|▉         | 190/2000 [4:55:45<52:50:48, 105.11s/it]

Training_loss 0.60447,   Accuracy 0.77168


 10%|▉         | 191/2000 [4:57:32<53:08:27, 105.75s/it]

Training_loss 0.60418,   Accuracy 0.77171


 10%|▉         | 192/2000 [4:59:16<52:52:00, 105.27s/it]

Training_loss 0.60394,   Accuracy 0.77181


 10%|▉         | 193/2000 [5:01:03<53:02:51, 105.68s/it]

Training_loss 0.60351,   Accuracy 0.77158


 10%|▉         | 194/2000 [5:03:04<55:25:06, 110.47s/it]

Training_loss 0.60339,   Accuracy 0.77168


 10%|▉         | 195/2000 [5:05:05<56:55:35, 113.54s/it]

Training_loss 0.60300,   Accuracy 0.77158


 10%|▉         | 196/2000 [5:07:06<57:59:12, 115.72s/it]

Training_loss 0.60260,   Accuracy 0.77145


 10%|▉         | 197/2000 [5:08:52<56:30:40, 112.83s/it]

Training_loss 0.60247,   Accuracy 0.77139


 10%|▉         | 198/2000 [5:10:38<55:29:17, 110.85s/it]

Training_loss 0.60223,   Accuracy 0.77139


 10%|▉         | 199/2000 [5:12:26<54:54:24, 109.75s/it]

Training_loss 0.60237,   Accuracy 0.77158


 10%|█         | 200/2000 [5:14:12<54:22:02, 108.73s/it]

Training_loss 0.60229,   Accuracy 0.77155


 10%|█         | 201/2000 [5:15:57<53:52:10, 107.80s/it]

Training_loss 0.60219,   Accuracy 0.77155


 10%|█         | 202/2000 [5:17:41<53:13:26, 106.57s/it]

Training_loss 0.60224,   Accuracy 0.77171


 10%|█         | 203/2000 [5:19:30<53:34:26, 107.33s/it]

Training_loss 0.60215,   Accuracy 0.77177


 10%|█         | 204/2000 [5:21:15<53:12:17, 106.65s/it]

Training_loss 0.60187,   Accuracy 0.77174


 10%|█         | 205/2000 [5:23:02<53:12:11, 106.70s/it]

Training_loss 0.60136,   Accuracy 0.77161


 10%|█         | 206/2000 [5:24:45<52:39:01, 105.65s/it]

Training_loss 0.60137,   Accuracy 0.77164


 10%|█         | 207/2000 [5:26:38<53:39:41, 107.74s/it]

Training_loss 0.60096,   Accuracy 0.77164


 10%|█         | 208/2000 [5:28:25<53:27:09, 107.38s/it]

Training_loss 0.60117,   Accuracy 0.77174


 10%|█         | 209/2000 [5:30:10<53:11:56, 106.93s/it]

Training_loss 0.60090,   Accuracy 0.77164


 10%|█         | 210/2000 [5:31:58<53:12:43, 107.02s/it]

Training_loss 0.60059,   Accuracy 0.77174


 11%|█         | 211/2000 [5:33:43<52:59:35, 106.64s/it]

Training_loss 0.60039,   Accuracy 0.77177


 11%|█         | 212/2000 [5:35:27<52:32:09, 105.78s/it]

Training_loss 0.59999,   Accuracy 0.77171


 11%|█         | 213/2000 [5:37:13<52:30:16, 105.77s/it]

Training_loss 0.60026,   Accuracy 0.77177


 11%|█         | 214/2000 [5:38:59<52:34:37, 105.98s/it]

Training_loss 0.59949,   Accuracy 0.77148


 11%|█         | 215/2000 [5:40:47<52:50:35, 106.57s/it]

Training_loss 0.59921,   Accuracy 0.77148


 11%|█         | 216/2000 [5:42:34<52:53:50, 106.74s/it]

Training_loss 0.59896,   Accuracy 0.77161


 11%|█         | 217/2000 [5:44:26<53:38:09, 108.29s/it]

Training_loss 0.59899,   Accuracy 0.77161


 11%|█         | 218/2000 [5:46:15<53:40:06, 108.42s/it]

Training_loss 0.59841,   Accuracy 0.77148


 11%|█         | 219/2000 [5:48:00<53:10:11, 107.47s/it]

Training_loss 0.59835,   Accuracy 0.77164


 11%|█         | 220/2000 [5:49:49<53:16:51, 107.76s/it]

Training_loss 0.59794,   Accuracy 0.77151


 11%|█         | 221/2000 [5:51:32<52:35:48, 106.44s/it]

Training_loss 0.59762,   Accuracy 0.77148


 11%|█         | 222/2000 [5:53:18<52:26:01, 106.17s/it]

Training_loss 0.59759,   Accuracy 0.77148


 11%|█         | 223/2000 [5:55:01<51:55:46, 105.20s/it]

Training_loss 0.59742,   Accuracy 0.77142


 11%|█         | 224/2000 [5:56:48<52:10:35, 105.76s/it]

Training_loss 0.59709,   Accuracy 0.77161


 11%|█▏        | 225/2000 [5:58:34<52:17:51, 106.07s/it]

Training_loss 0.59679,   Accuracy 0.77116


 11%|█▏        | 226/2000 [6:00:22<52:32:01, 106.61s/it]

Training_loss 0.59630,   Accuracy 0.77106


 11%|█▏        | 227/2000 [6:02:13<53:05:30, 107.80s/it]

Training_loss 0.59621,   Accuracy 0.77106


 11%|█▏        | 228/2000 [6:04:16<55:19:37, 112.40s/it]

Training_loss 0.59586,   Accuracy 0.77074


 11%|█▏        | 229/2000 [6:06:27<58:05:17, 118.08s/it]

Training_loss 0.59597,   Accuracy 0.77132


 12%|█▏        | 230/2000 [6:08:31<58:54:42, 119.82s/it]

Training_loss 0.59565,   Accuracy 0.77135


 12%|█▏        | 231/2000 [6:10:28<58:29:11, 119.02s/it]

Training_loss 0.59531,   Accuracy 0.77074


 12%|█▏        | 232/2000 [6:12:19<57:07:59, 116.33s/it]

Training_loss 0.59485,   Accuracy 0.77042


 12%|█▏        | 233/2000 [6:14:06<55:46:28, 113.63s/it]

Training_loss 0.59475,   Accuracy 0.77067


 12%|█▏        | 234/2000 [6:15:51<54:27:12, 111.00s/it]

Training_loss 0.59456,   Accuracy 0.77025


 12%|█▏        | 235/2000 [6:17:41<54:14:51, 110.65s/it]

Training_loss 0.59463,   Accuracy 0.77097


 12%|█▏        | 236/2000 [6:19:26<53:24:25, 108.99s/it]

Training_loss 0.59459,   Accuracy 0.77103


 12%|█▏        | 237/2000 [6:21:11<52:51:54, 107.95s/it]

Training_loss 0.59448,   Accuracy 0.77100


 12%|█▏        | 238/2000 [6:22:59<52:46:12, 107.82s/it]

Training_loss 0.59433,   Accuracy 0.77097


 12%|█▏        | 239/2000 [6:24:43<52:17:37, 106.90s/it]

Training_loss 0.59387,   Accuracy 0.77009


 12%|█▏        | 240/2000 [6:26:31<52:25:28, 107.23s/it]

Training_loss 0.59363,   Accuracy 0.77006


 12%|█▏        | 241/2000 [6:28:17<52:08:08, 106.70s/it]

Training_loss 0.59362,   Accuracy 0.77016


 12%|█▏        | 242/2000 [6:30:04<52:10:15, 106.83s/it]

Training_loss 0.59325,   Accuracy 0.76996


 12%|█▏        | 243/2000 [6:31:50<52:04:50, 106.71s/it]

Training_loss 0.59325,   Accuracy 0.77006


 12%|█▏        | 244/2000 [6:33:38<52:12:48, 107.04s/it]

Training_loss 0.59304,   Accuracy 0.77003


 12%|█▏        | 245/2000 [6:35:24<51:58:06, 106.60s/it]

Training_loss 0.59306,   Accuracy 0.77032


 12%|█▏        | 246/2000 [6:37:09<51:45:01, 106.22s/it]

Training_loss 0.59271,   Accuracy 0.77035


 12%|█▏        | 247/2000 [6:38:56<51:49:22, 106.42s/it]

Training_loss 0.59286,   Accuracy 0.77071


 12%|█▏        | 248/2000 [6:40:50<52:50:02, 108.56s/it]

Training_loss 0.59245,   Accuracy 0.77067


 12%|█▏        | 249/2000 [6:42:36<52:28:29, 107.89s/it]

Training_loss 0.59257,   Accuracy 0.77090


 12%|█▎        | 250/2000 [6:44:21<51:57:20, 106.88s/it]

Training_loss 0.59227,   Accuracy 0.77090


 13%|█▎        | 251/2000 [6:46:10<52:19:48, 107.71s/it]

Training_loss 0.59195,   Accuracy 0.77071


 13%|█▎        | 252/2000 [6:47:56<52:04:34, 107.25s/it]

Training_loss 0.59149,   Accuracy 0.77038


 13%|█▎        | 253/2000 [6:49:43<51:59:11, 107.13s/it]

Training_loss 0.59111,   Accuracy 0.77035


 13%|█▎        | 254/2000 [6:51:27<51:27:24, 106.10s/it]

Training_loss 0.59102,   Accuracy 0.77019


 13%|█▎        | 255/2000 [6:53:13<51:25:42, 106.10s/it]

Training_loss 0.59089,   Accuracy 0.77009


 13%|█▎        | 256/2000 [6:54:56<50:59:39, 105.26s/it]

Training_loss 0.59041,   Accuracy 0.77000


 13%|█▎        | 257/2000 [6:56:41<50:56:04, 105.20s/it]

Training_loss 0.59038,   Accuracy 0.77009


 13%|█▎        | 258/2000 [6:58:28<51:10:13, 105.75s/it]

Training_loss 0.59048,   Accuracy 0.77032


 13%|█▎        | 259/2000 [7:00:18<51:39:24, 106.81s/it]

Training_loss 0.59040,   Accuracy 0.77035


 13%|█▎        | 260/2000 [7:02:10<52:27:04, 108.52s/it]

Training_loss 0.59013,   Accuracy 0.77032


 13%|█▎        | 261/2000 [7:04:11<54:11:46, 112.19s/it]

Training_loss 0.59014,   Accuracy 0.77042


 13%|█▎        | 262/2000 [7:06:15<55:49:53, 115.65s/it]

Training_loss 0.59016,   Accuracy 0.77064


 13%|█▎        | 263/2000 [7:08:19<57:00:06, 118.14s/it]

Training_loss 0.59000,   Accuracy 0.77051


 13%|█▎        | 264/2000 [7:10:10<56:00:01, 116.13s/it]

Training_loss 0.58973,   Accuracy 0.77038


 13%|█▎        | 265/2000 [7:11:57<54:41:00, 113.46s/it]

Training_loss 0.58953,   Accuracy 0.77019


 13%|█▎        | 266/2000 [7:13:47<54:03:18, 112.23s/it]

Training_loss 0.58911,   Accuracy 0.77003


 13%|█▎        | 267/2000 [7:15:32<53:04:53, 110.27s/it]

Training_loss 0.58874,   Accuracy 0.76861


 13%|█▎        | 268/2000 [7:17:17<52:18:54, 108.74s/it]

Training_loss 0.58842,   Accuracy 0.76848


 13%|█▎        | 269/2000 [7:19:02<51:42:30, 107.54s/it]

Training_loss 0.58815,   Accuracy 0.76848


 14%|█▎        | 270/2000 [7:20:48<51:27:22, 107.08s/it]

Training_loss 0.58790,   Accuracy 0.76861


 14%|█▎        | 271/2000 [7:22:34<51:14:37, 106.70s/it]

Training_loss 0.58807,   Accuracy 0.76861


 14%|█▎        | 272/2000 [7:24:18<50:51:54, 105.97s/it]

Training_loss 0.58797,   Accuracy 0.76861


 14%|█▎        | 273/2000 [7:26:04<50:51:58, 106.03s/it]

Training_loss 0.58793,   Accuracy 0.76861


 14%|█▎        | 274/2000 [7:27:50<50:44:02, 105.82s/it]

Training_loss 0.58791,   Accuracy 0.76870


 14%|█▍        | 275/2000 [7:29:36<50:42:30, 105.83s/it]

Training_loss 0.58791,   Accuracy 0.76870


 14%|█▍        | 276/2000 [7:31:25<51:08:50, 106.80s/it]

Training_loss 0.58751,   Accuracy 0.76877


 14%|█▍        | 277/2000 [7:33:09<50:43:14, 105.97s/it]

Training_loss 0.58730,   Accuracy 0.76857


 14%|█▍        | 278/2000 [7:34:54<50:31:28, 105.63s/it]

Training_loss 0.58684,   Accuracy 0.76857


 14%|█▍        | 279/2000 [7:36:38<50:20:34, 105.31s/it]

Training_loss 0.58645,   Accuracy 0.76761


 14%|█▍        | 280/2000 [7:38:23<50:13:17, 105.11s/it]

Training_loss 0.58644,   Accuracy 0.76857


 14%|█▍        | 281/2000 [7:40:07<50:06:51, 104.95s/it]

Training_loss 0.58644,   Accuracy 0.76857


 14%|█▍        | 282/2000 [7:41:55<50:30:53, 105.85s/it]

Training_loss 0.58647,   Accuracy 0.76857


 14%|█▍        | 283/2000 [7:43:39<50:12:27, 105.27s/it]

Training_loss 0.58618,   Accuracy 0.76854


 14%|█▍        | 284/2000 [7:45:22<49:51:00, 104.58s/it]

Training_loss 0.58620,   Accuracy 0.76880


 14%|█▍        | 285/2000 [7:47:07<49:51:39, 104.66s/it]

Training_loss 0.58592,   Accuracy 0.76857


 14%|█▍        | 286/2000 [7:48:53<50:01:11, 105.06s/it]

Training_loss 0.58563,   Accuracy 0.76854


 14%|█▍        | 287/2000 [7:50:46<51:08:01, 107.46s/it]

Training_loss 0.58516,   Accuracy 0.76793


 14%|█▍        | 288/2000 [7:52:33<50:58:15, 107.18s/it]

Training_loss 0.58503,   Accuracy 0.76764


 14%|█▍        | 289/2000 [7:54:20<51:01:03, 107.34s/it]

Training_loss 0.58517,   Accuracy 0.76819


 14%|█▍        | 290/2000 [7:56:06<50:45:56, 106.88s/it]

Training_loss 0.58488,   Accuracy 0.76773


 15%|█▍        | 291/2000 [7:57:59<51:31:38, 108.54s/it]

Training_loss 0.58499,   Accuracy 0.76815


 15%|█▍        | 292/2000 [7:59:58<53:03:34, 111.84s/it]

Training_loss 0.58474,   Accuracy 0.76819


 15%|█▍        | 293/2000 [8:01:58<54:06:47, 114.12s/it]

Training_loss 0.58443,   Accuracy 0.76767


 15%|█▍        | 294/2000 [8:03:46<53:18:55, 112.51s/it]

Training_loss 0.58445,   Accuracy 0.76899


 15%|█▍        | 295/2000 [8:05:29<51:50:26, 109.46s/it]

Training_loss 0.58451,   Accuracy 0.76899


 15%|█▍        | 296/2000 [8:07:17<51:42:00, 109.23s/it]

Training_loss 0.58461,   Accuracy 0.76909


 15%|█▍        | 297/2000 [8:09:02<50:58:01, 107.74s/it]

Training_loss 0.58470,   Accuracy 0.76890


 15%|█▍        | 298/2000 [8:10:47<50:40:04, 107.17s/it]

Training_loss 0.58426,   Accuracy 0.76909


 15%|█▍        | 299/2000 [8:12:34<50:32:59, 106.98s/it]

Training_loss 0.58418,   Accuracy 0.76899


 15%|█▌        | 300/2000 [8:14:29<51:38:29, 109.36s/it]

Training_loss 0.58362,   Accuracy 0.76809


 15%|█▌        | 301/2000 [8:16:19<51:39:07, 109.45s/it]

Training_loss 0.58377,   Accuracy 0.76899


 15%|█▌        | 302/2000 [8:18:12<52:08:38, 110.55s/it]

Training_loss 0.58359,   Accuracy 0.76899


 15%|█▌        | 303/2000 [8:20:02<52:01:47, 110.38s/it]

Training_loss 0.58340,   Accuracy 0.76812


 15%|█▌        | 304/2000 [8:21:48<51:22:30, 109.05s/it]

Training_loss 0.58298,   Accuracy 0.76815


 15%|█▌        | 305/2000 [8:23:31<50:35:11, 107.44s/it]

Training_loss 0.58271,   Accuracy 0.76815


 15%|█▌        | 306/2000 [8:25:16<50:14:25, 106.77s/it]

Training_loss 0.58293,   Accuracy 0.76896


 15%|█▌        | 307/2000 [8:27:06<50:32:34, 107.47s/it]

Training_loss 0.58276,   Accuracy 0.76812


 15%|█▌        | 308/2000 [8:28:49<49:56:37, 106.26s/it]

Training_loss 0.58270,   Accuracy 0.76809


 15%|█▌        | 309/2000 [8:30:36<49:59:09, 106.42s/it]

Training_loss 0.58275,   Accuracy 0.76896


 16%|█▌        | 310/2000 [8:32:20<49:40:46, 105.83s/it]

Training_loss 0.58266,   Accuracy 0.76903


 16%|█▌        | 311/2000 [8:34:04<49:21:57, 105.22s/it]

Training_loss 0.58274,   Accuracy 0.76906


 16%|█▌        | 312/2000 [8:35:49<49:17:29, 105.12s/it]

Training_loss 0.58240,   Accuracy 0.76899


 16%|█▌        | 313/2000 [8:37:37<49:36:55, 105.88s/it]

Training_loss 0.58233,   Accuracy 0.76903


 16%|█▌        | 314/2000 [8:39:22<49:34:41, 105.86s/it]

Training_loss 0.58246,   Accuracy 0.76906


 16%|█▌        | 315/2000 [8:41:12<50:04:18, 106.98s/it]

Training_loss 0.58238,   Accuracy 0.76906


 16%|█▌        | 316/2000 [8:43:09<51:26:32, 109.97s/it]

Training_loss 0.58228,   Accuracy 0.76906


 16%|█▌        | 317/2000 [8:44:53<50:32:18, 108.10s/it]

Training_loss 0.58217,   Accuracy 0.76906


 16%|█▌        | 318/2000 [8:46:37<50:01:29, 107.07s/it]

Training_loss 0.58190,   Accuracy 0.76903


 16%|█▌        | 319/2000 [8:48:21<49:31:37, 106.07s/it]

Training_loss 0.58180,   Accuracy 0.76903


 16%|█▌        | 320/2000 [8:50:09<49:44:49, 106.60s/it]

Training_loss 0.58179,   Accuracy 0.76906


 16%|█▌        | 321/2000 [8:51:55<49:38:09, 106.43s/it]

Training_loss 0.58143,   Accuracy 0.76903


 16%|█▌        | 322/2000 [8:53:41<49:35:28, 106.39s/it]

Training_loss 0.58139,   Accuracy 0.76903


 16%|█▌        | 323/2000 [8:55:28<49:33:27, 106.38s/it]

Training_loss 0.58132,   Accuracy 0.76896


 16%|█▌        | 324/2000 [8:57:15<49:40:41, 106.71s/it]

Training_loss 0.58128,   Accuracy 0.76906


 16%|█▋        | 325/2000 [8:58:59<49:15:24, 105.87s/it]

Training_loss 0.58123,   Accuracy 0.76896


 16%|█▋        | 326/2000 [9:00:48<49:43:50, 106.95s/it]

Training_loss 0.58101,   Accuracy 0.76896


 16%|█▋        | 327/2000 [9:02:38<50:06:38, 107.83s/it]

Training_loss 0.58064,   Accuracy 0.76793


 16%|█▋        | 328/2000 [9:04:22<49:32:08, 106.66s/it]

Training_loss 0.58061,   Accuracy 0.76793


 16%|█▋        | 329/2000 [9:06:08<49:24:43, 106.45s/it]

Training_loss 0.58069,   Accuracy 0.76793


 16%|█▋        | 330/2000 [9:07:57<49:44:51, 107.24s/it]

Training_loss 0.58049,   Accuracy 0.76793


 17%|█▋        | 331/2000 [9:09:43<49:30:06, 106.77s/it]

Training_loss 0.58059,   Accuracy 0.76896


 17%|█▋        | 332/2000 [9:11:32<49:47:15, 107.46s/it]

Training_loss 0.58021,   Accuracy 0.76806


 17%|█▋        | 333/2000 [9:13:18<49:33:00, 107.01s/it]

Training_loss 0.57998,   Accuracy 0.76799


 17%|█▋        | 334/2000 [9:15:01<48:55:01, 105.70s/it]

Training_loss 0.57976,   Accuracy 0.76806


 17%|█▋        | 335/2000 [9:16:47<49:01:24, 106.00s/it]

Training_loss 0.57946,   Accuracy 0.76783


 17%|█▋        | 336/2000 [9:18:34<49:06:48, 106.26s/it]

Training_loss 0.57926,   Accuracy 0.76757


 17%|█▋        | 337/2000 [9:20:21<49:13:18, 106.55s/it]

Training_loss 0.57952,   Accuracy 0.76815


 17%|█▋        | 338/2000 [9:22:07<49:00:17, 106.15s/it]

Training_loss 0.57927,   Accuracy 0.76819


 17%|█▋        | 339/2000 [9:23:53<48:56:30, 106.08s/it]

Training_loss 0.57909,   Accuracy 0.76783


 17%|█▋        | 340/2000 [9:25:36<48:32:15, 105.26s/it]

Training_loss 0.57890,   Accuracy 0.76757


 17%|█▋        | 341/2000 [9:27:28<49:24:52, 107.23s/it]

Training_loss 0.57873,   Accuracy 0.76748


 17%|█▋        | 342/2000 [9:29:13<49:04:32, 106.56s/it]

Training_loss 0.57887,   Accuracy 0.76783


 17%|█▋        | 343/2000 [9:30:59<48:59:51, 106.45s/it]

Training_loss 0.57890,   Accuracy 0.76783


 17%|█▋        | 344/2000 [9:32:47<49:07:35, 106.80s/it]

Training_loss 0.57888,   Accuracy 0.76793


 17%|█▋        | 345/2000 [9:34:30<48:39:03, 105.83s/it]

Training_loss 0.57866,   Accuracy 0.76757


 17%|█▋        | 346/2000 [9:36:14<48:23:43, 105.33s/it]

Training_loss 0.57853,   Accuracy 0.76754


 17%|█▋        | 347/2000 [9:38:00<48:24:07, 105.41s/it]

Training_loss 0.57837,   Accuracy 0.76754


 17%|█▋        | 348/2000 [9:39:47<48:36:55, 105.94s/it]

Training_loss 0.57849,   Accuracy 0.76761


 17%|█▋        | 349/2000 [9:41:32<48:25:43, 105.60s/it]

Training_loss 0.57842,   Accuracy 0.76786


 18%|█▊        | 350/2000 [9:43:22<49:05:00, 107.09s/it]

Training_loss 0.57852,   Accuracy 0.76796


 18%|█▊        | 351/2000 [9:45:06<48:36:29, 106.12s/it]

Training_loss 0.57852,   Accuracy 0.76799


 18%|█▊        | 352/2000 [9:46:53<48:38:16, 106.25s/it]

Training_loss 0.57836,   Accuracy 0.76803


 18%|█▊        | 353/2000 [9:48:36<48:14:08, 105.43s/it]

Training_loss 0.57837,   Accuracy 0.76890


 18%|█▊        | 354/2000 [9:50:24<48:31:39, 106.14s/it]

Training_loss 0.57832,   Accuracy 0.76893


 18%|█▊        | 355/2000 [9:52:11<48:37:03, 106.40s/it]

Training_loss 0.57852,   Accuracy 0.76932


 18%|█▊        | 356/2000 [9:53:54<48:09:13, 105.45s/it]

Training_loss 0.57856,   Accuracy 0.76951


 18%|█▊        | 357/2000 [9:55:42<48:23:04, 106.02s/it]

Training_loss 0.57825,   Accuracy 0.76932


 18%|█▊        | 358/2000 [9:57:35<49:21:25, 108.21s/it]

Training_loss 0.57826,   Accuracy 0.76932


 18%|█▊        | 359/2000 [9:59:18<48:33:48, 106.54s/it]

Training_loss 0.57828,   Accuracy 0.76954


 18%|█▊        | 360/2000 [10:01:09<49:13:34, 108.06s/it]

Training_loss 0.57803,   Accuracy 0.76929


 18%|█▊        | 361/2000 [10:03:15<51:37:29, 113.39s/it]

Training_loss 0.57780,   Accuracy 0.76938


 18%|█▊        | 362/2000 [10:05:28<54:10:53, 119.08s/it]

Training_loss 0.57778,   Accuracy 0.76938


 18%|█▊        | 363/2000 [10:08:01<58:54:19, 129.54s/it]

Training_loss 0.57750,   Accuracy 0.76909


 18%|█▊        | 364/2000 [10:10:04<57:58:27, 127.57s/it]

Training_loss 0.57716,   Accuracy 0.76883


 18%|█▊        | 365/2000 [10:11:51<55:02:58, 121.21s/it]

Training_loss 0.57705,   Accuracy 0.76899


 18%|█▊        | 366/2000 [10:13:43<53:48:46, 118.56s/it]

Training_loss 0.57722,   Accuracy 0.76916


 18%|█▊        | 367/2000 [10:15:29<52:01:05, 114.68s/it]

Training_loss 0.57710,   Accuracy 0.76935


 18%|█▊        | 368/2000 [10:17:15<50:50:52, 112.16s/it]

Training_loss 0.57695,   Accuracy 0.76899


 18%|█▊        | 369/2000 [10:19:02<50:03:41, 110.50s/it]

Training_loss 0.57686,   Accuracy 0.76899


 18%|█▊        | 370/2000 [10:20:51<49:53:55, 110.21s/it]

Training_loss 0.57669,   Accuracy 0.76861


 19%|█▊        | 371/2000 [10:22:38<49:22:17, 109.11s/it]

Training_loss 0.57644,   Accuracy 0.76770


 19%|█▊        | 372/2000 [10:24:21<48:35:28, 107.45s/it]

Training_loss 0.57646,   Accuracy 0.76861


 19%|█▊        | 373/2000 [10:26:06<48:08:38, 106.53s/it]

Training_loss 0.57649,   Accuracy 0.76899


 19%|█▊        | 374/2000 [10:27:54<48:18:21, 106.95s/it]

Training_loss 0.57657,   Accuracy 0.76906


 19%|█▉        | 375/2000 [10:29:41<48:18:04, 107.01s/it]

Training_loss 0.57635,   Accuracy 0.76906


 19%|█▉        | 376/2000 [10:31:35<49:15:12, 109.18s/it]

Training_loss 0.57644,   Accuracy 0.76954


 19%|█▉        | 377/2000 [10:33:26<49:30:07, 109.80s/it]

Training_loss 0.57643,   Accuracy 0.76951


 19%|█▉        | 378/2000 [10:35:15<49:20:20, 109.51s/it]

Training_loss 0.57623,   Accuracy 0.76954


 19%|█▉        | 379/2000 [10:37:08<49:48:55, 110.63s/it]

Training_loss 0.57610,   Accuracy 0.76954


 19%|█▉        | 380/2000 [10:38:57<49:32:49, 110.10s/it]

Training_loss 0.57611,   Accuracy 0.76954


 19%|█▉        | 381/2000 [10:40:42<48:48:41, 108.54s/it]

Training_loss 0.57602,   Accuracy 0.76961


 19%|█▉        | 382/2000 [10:42:27<48:19:47, 107.53s/it]

Training_loss 0.57588,   Accuracy 0.76951


 19%|█▉        | 383/2000 [10:44:18<48:39:46, 108.34s/it]

Training_loss 0.57604,   Accuracy 0.76958


 19%|█▉        | 384/2000 [10:46:11<49:22:24, 109.99s/it]

Training_loss 0.57580,   Accuracy 0.76951


 19%|█▉        | 385/2000 [10:48:08<50:17:37, 112.11s/it]

Training_loss 0.57535,   Accuracy 0.76929


 19%|█▉        | 386/2000 [10:49:55<49:33:05, 110.52s/it]

Training_loss 0.57524,   Accuracy 0.76909


 19%|█▉        | 387/2000 [10:51:39<48:38:16, 108.55s/it]

Training_loss 0.57517,   Accuracy 0.76929


 19%|█▉        | 388/2000 [10:53:24<48:09:47, 107.56s/it]

Training_loss 0.57503,   Accuracy 0.76909


 19%|█▉        | 389/2000 [10:55:07<47:30:41, 106.17s/it]

Training_loss 0.57501,   Accuracy 0.76887


 20%|█▉        | 390/2000 [10:56:55<47:38:10, 106.52s/it]

Training_loss 0.57494,   Accuracy 0.76887


 20%|█▉        | 391/2000 [10:58:38<47:10:49, 105.56s/it]

Training_loss 0.57490,   Accuracy 0.76896


 20%|█▉        | 392/2000 [11:00:24<47:10:02, 105.60s/it]

Training_loss 0.57517,   Accuracy 0.76951


 20%|█▉        | 393/2000 [11:02:15<47:53:07, 107.27s/it]

Training_loss 0.57519,   Accuracy 0.76964


 20%|█▉        | 394/2000 [11:04:11<49:04:31, 110.01s/it]

Training_loss 0.57512,   Accuracy 0.76964


 20%|█▉        | 395/2000 [11:06:10<50:13:42, 112.66s/it]

Training_loss 0.57521,   Accuracy 0.76964


 20%|█▉        | 396/2000 [11:08:05<50:26:07, 113.20s/it]

Training_loss 0.57507,   Accuracy 0.76967


 20%|█▉        | 397/2000 [11:09:50<49:25:09, 110.99s/it]

Training_loss 0.57502,   Accuracy 0.76961


 20%|█▉        | 398/2000 [11:11:35<48:28:59, 108.95s/it]

Training_loss 0.57499,   Accuracy 0.76980


 20%|█▉        | 399/2000 [11:13:22<48:17:47, 108.60s/it]

Training_loss 0.57505,   Accuracy 0.76964


 20%|██        | 400/2000 [11:15:05<47:24:33, 106.67s/it]

Training_loss 0.57493,   Accuracy 0.76964


 20%|██        | 401/2000 [11:16:51<47:23:35, 106.70s/it]

Training_loss 0.57467,   Accuracy 0.76951


 20%|██        | 402/2000 [11:18:37<47:09:47, 106.25s/it]

Training_loss 0.57474,   Accuracy 0.76964


 20%|██        | 403/2000 [11:20:25<47:27:45, 106.99s/it]

Training_loss 0.57443,   Accuracy 0.76954


 20%|██        | 404/2000 [11:22:14<47:40:12, 107.53s/it]

Training_loss 0.57440,   Accuracy 0.76951


 20%|██        | 405/2000 [11:23:59<47:18:11, 106.77s/it]

Training_loss 0.57425,   Accuracy 0.76925


 20%|██        | 406/2000 [11:25:42<46:42:57, 105.51s/it]

Training_loss 0.57421,   Accuracy 0.76925


 20%|██        | 407/2000 [11:27:30<47:00:35, 106.24s/it]

Training_loss 0.57414,   Accuracy 0.76925


 20%|██        | 408/2000 [11:29:16<47:03:35, 106.42s/it]

Training_loss 0.57394,   Accuracy 0.76890


 20%|██        | 409/2000 [11:31:01<46:47:32, 105.88s/it]

Training_loss 0.57369,   Accuracy 0.76883


 20%|██        | 410/2000 [11:32:48<46:56:22, 106.28s/it]

Training_loss 0.57370,   Accuracy 0.76899


 21%|██        | 411/2000 [11:34:44<48:06:04, 108.98s/it]

Training_loss 0.57352,   Accuracy 0.76929


 21%|██        | 412/2000 [11:36:27<47:23:52, 107.45s/it]

Training_loss 0.57339,   Accuracy 0.76929


 21%|██        | 413/2000 [11:38:19<47:54:57, 108.69s/it]

Training_loss 0.57327,   Accuracy 0.76887


 21%|██        | 414/2000 [11:40:05<47:28:22, 107.76s/it]

Training_loss 0.57327,   Accuracy 0.76899


 21%|██        | 415/2000 [11:41:52<47:26:21, 107.75s/it]

Training_loss 0.57296,   Accuracy 0.76867


 21%|██        | 416/2000 [11:43:44<47:53:22, 108.84s/it]

Training_loss 0.57292,   Accuracy 0.76867


 21%|██        | 417/2000 [11:45:31<47:37:35, 108.31s/it]

Training_loss 0.57260,   Accuracy 0.76809


 21%|██        | 418/2000 [11:47:15<47:05:00, 107.14s/it]

Training_loss 0.57246,   Accuracy 0.76809


 21%|██        | 419/2000 [11:49:00<46:46:32, 106.51s/it]

Training_loss 0.57230,   Accuracy 0.76780


 21%|██        | 420/2000 [11:50:47<46:45:28, 106.54s/it]

Training_loss 0.57220,   Accuracy 0.76783


 21%|██        | 421/2000 [11:52:32<46:34:04, 106.17s/it]

Training_loss 0.57207,   Accuracy 0.76790


 21%|██        | 422/2000 [11:54:16<46:17:42, 105.62s/it]

Training_loss 0.57196,   Accuracy 0.76806


 21%|██        | 423/2000 [11:56:09<47:12:25, 107.76s/it]

Training_loss 0.57184,   Accuracy 0.76806


 21%|██        | 424/2000 [11:58:13<49:14:18, 112.47s/it]

Training_loss 0.57158,   Accuracy 0.76680


 21%|██▏       | 425/2000 [12:00:11<49:56:48, 114.16s/it]

Training_loss 0.57153,   Accuracy 0.76680


 21%|██▏       | 426/2000 [12:02:13<50:56:45, 116.52s/it]

Training_loss 0.57128,   Accuracy 0.76683


 21%|██▏       | 427/2000 [12:04:27<53:14:44, 121.86s/it]

Training_loss 0.57119,   Accuracy 0.76689


 21%|██▏       | 428/2000 [12:06:53<56:21:58, 129.08s/it]

Training_loss 0.57118,   Accuracy 0.76680


 21%|██▏       | 429/2000 [12:09:11<57:26:52, 131.64s/it]

Training_loss 0.57117,   Accuracy 0.76793


 22%|██▏       | 430/2000 [12:11:19<56:57:37, 130.61s/it]

Training_loss 0.57127,   Accuracy 0.76793


 22%|██▏       | 431/2000 [12:13:23<56:08:19, 128.81s/it]

Training_loss 0.57108,   Accuracy 0.76793


 22%|██▏       | 432/2000 [12:15:17<54:06:26, 124.23s/it]

Training_loss 0.57092,   Accuracy 0.76767


 22%|██▏       | 433/2000 [12:17:09<52:28:50, 120.57s/it]

Training_loss 0.57094,   Accuracy 0.76793


 22%|██▏       | 434/2000 [12:18:59<51:06:16, 117.48s/it]

Training_loss 0.57080,   Accuracy 0.76786


 22%|██▏       | 435/2000 [12:20:55<50:53:37, 117.07s/it]

Training_loss 0.57058,   Accuracy 0.76693


 22%|██▏       | 436/2000 [12:22:49<50:25:48, 116.08s/it]

Training_loss 0.57057,   Accuracy 0.76786


 22%|██▏       | 437/2000 [12:24:46<50:26:32, 116.18s/it]

Training_loss 0.57043,   Accuracy 0.76689


 22%|██▏       | 438/2000 [12:27:00<52:46:47, 121.64s/it]

Training_loss 0.57036,   Accuracy 0.76709


 22%|██▏       | 439/2000 [12:29:27<56:06:11, 129.39s/it]

Training_loss 0.57038,   Accuracy 0.76709


 22%|██▏       | 440/2000 [12:31:36<55:59:59, 129.23s/it]

Training_loss 0.57036,   Accuracy 0.76709


 22%|██▏       | 441/2000 [12:33:43<55:35:22, 128.37s/it]

Training_loss 0.57025,   Accuracy 0.76693


 22%|██▏       | 442/2000 [12:36:13<58:24:51, 134.98s/it]

Training_loss 0.57017,   Accuracy 0.76709


 22%|██▏       | 443/2000 [12:38:26<58:06:51, 134.37s/it]

Training_loss 0.57046,   Accuracy 0.76799


 22%|██▏       | 444/2000 [12:40:29<56:36:56, 130.99s/it]

Training_loss 0.57065,   Accuracy 0.76845


 22%|██▏       | 445/2000 [12:42:20<53:56:43, 124.89s/it]

Training_loss 0.57071,   Accuracy 0.76890


 22%|██▏       | 446/2000 [12:44:05<51:20:11, 118.93s/it]

Training_loss 0.57088,   Accuracy 0.76945


 22%|██▏       | 447/2000 [12:45:50<49:30:48, 114.78s/it]

Training_loss 0.57101,   Accuracy 0.76993


 22%|██▏       | 448/2000 [12:47:47<49:46:36, 115.46s/it]

Training_loss 0.57102,   Accuracy 0.77045


 22%|██▏       | 449/2000 [12:49:47<50:18:19, 116.76s/it]

Training_loss 0.57100,   Accuracy 0.77055


 22%|██▎       | 450/2000 [12:51:36<49:15:10, 114.39s/it]

Training_loss 0.57086,   Accuracy 0.77058


 23%|██▎       | 451/2000 [12:53:23<48:17:17, 112.23s/it]

Training_loss 0.57060,   Accuracy 0.77006


 23%|██▎       | 452/2000 [12:55:07<47:14:12, 109.85s/it]

Training_loss 0.57064,   Accuracy 0.77045


 23%|██▎       | 453/2000 [12:56:56<47:05:55, 109.60s/it]

Training_loss 0.57044,   Accuracy 0.77019


 23%|██▎       | 454/2000 [12:58:42<46:34:05, 108.44s/it]

Training_loss 0.57043,   Accuracy 0.77000


 23%|██▎       | 455/2000 [13:00:32<46:45:09, 108.94s/it]

Training_loss 0.57048,   Accuracy 0.77022


 23%|██▎       | 456/2000 [13:02:24<47:10:40, 110.00s/it]

Training_loss 0.57042,   Accuracy 0.77022


 23%|██▎       | 457/2000 [13:04:20<47:55:30, 111.82s/it]

Training_loss 0.57004,   Accuracy 0.76996


 23%|██▎       | 458/2000 [13:06:22<49:08:43, 114.74s/it]

Training_loss 0.57002,   Accuracy 0.76945


 23%|██▎       | 459/2000 [13:08:29<50:37:30, 118.27s/it]

Training_loss 0.57008,   Accuracy 0.77003


 23%|██▎       | 460/2000 [13:10:15<49:01:16, 114.60s/it]

Training_loss 0.56982,   Accuracy 0.76987


 23%|██▎       | 461/2000 [13:12:05<48:29:40, 113.44s/it]

Training_loss 0.56987,   Accuracy 0.77003


 23%|██▎       | 462/2000 [13:13:55<48:01:38, 112.42s/it]

Training_loss 0.56994,   Accuracy 0.77006


 23%|██▎       | 463/2000 [13:15:43<47:26:23, 111.12s/it]

Training_loss 0.56993,   Accuracy 0.77009


 23%|██▎       | 464/2000 [13:17:30<46:52:04, 109.85s/it]

Training_loss 0.56984,   Accuracy 0.77016


 23%|██▎       | 465/2000 [13:19:14<46:01:53, 107.96s/it]

Training_loss 0.56977,   Accuracy 0.77009


 23%|██▎       | 466/2000 [13:21:00<45:48:52, 107.52s/it]

Training_loss 0.56973,   Accuracy 0.77006


 23%|██▎       | 467/2000 [13:22:45<45:26:22, 106.71s/it]

Training_loss 0.56945,   Accuracy 0.77013


 23%|██▎       | 468/2000 [13:24:29<45:04:32, 105.92s/it]

Training_loss 0.56939,   Accuracy 0.77013


 23%|██▎       | 469/2000 [13:26:15<45:04:00, 105.97s/it]

Training_loss 0.56936,   Accuracy 0.77013


 24%|██▎       | 470/2000 [13:28:03<45:18:48, 106.62s/it]

Training_loss 0.56917,   Accuracy 0.77016


 24%|██▎       | 471/2000 [13:29:51<45:21:07, 106.78s/it]

Training_loss 0.56929,   Accuracy 0.77013


 24%|██▎       | 472/2000 [13:31:39<45:33:37, 107.34s/it]

Training_loss 0.56920,   Accuracy 0.77016


 24%|██▎       | 473/2000 [13:33:24<45:13:17, 106.61s/it]

Training_loss 0.56943,   Accuracy 0.77042


 24%|██▎       | 474/2000 [13:35:09<44:56:07, 106.01s/it]

Training_loss 0.56937,   Accuracy 0.77074


 24%|██▍       | 475/2000 [13:36:53<44:43:06, 105.56s/it]

Training_loss 0.56951,   Accuracy 0.77090


 24%|██▍       | 476/2000 [13:38:39<44:42:48, 105.62s/it]

Training_loss 0.56948,   Accuracy 0.77109


 24%|██▍       | 477/2000 [13:40:26<44:52:11, 106.06s/it]

Training_loss 0.56956,   Accuracy 0.77197


 24%|██▍       | 478/2000 [13:42:14<45:07:03, 106.72s/it]

Training_loss 0.56938,   Accuracy 0.77190


 24%|██▍       | 479/2000 [13:43:59<44:49:40, 106.10s/it]

Training_loss 0.56926,   Accuracy 0.77132


 24%|██▍       | 480/2000 [13:45:45<44:47:57, 106.10s/it]

Training_loss 0.56893,   Accuracy 0.77084


 24%|██▍       | 481/2000 [13:47:29<44:32:17, 105.55s/it]

Training_loss 0.56872,   Accuracy 0.77051


 24%|██▍       | 482/2000 [13:49:13<44:11:49, 104.82s/it]

Training_loss 0.56874,   Accuracy 0.77093


 24%|██▍       | 483/2000 [13:51:00<44:27:25, 105.50s/it]

Training_loss 0.56882,   Accuracy 0.77190


 24%|██▍       | 484/2000 [13:52:48<44:46:00, 106.31s/it]

Training_loss 0.56860,   Accuracy 0.77106


 24%|██▍       | 485/2000 [13:54:38<45:11:29, 107.39s/it]

Training_loss 0.56851,   Accuracy 0.77161


 24%|██▍       | 486/2000 [13:56:23<44:56:31, 106.86s/it]

Training_loss 0.56849,   Accuracy 0.77161


 24%|██▍       | 487/2000 [13:58:21<46:13:42, 109.99s/it]

Training_loss 0.56854,   Accuracy 0.77184


 24%|██▍       | 488/2000 [14:00:21<47:29:47, 113.09s/it]

Training_loss 0.56859,   Accuracy 0.77190


 24%|██▍       | 489/2000 [14:02:26<48:57:35, 116.65s/it]

Training_loss 0.56861,   Accuracy 0.77197


 24%|██▍       | 490/2000 [14:04:28<49:34:05, 118.18s/it]

Training_loss 0.56866,   Accuracy 0.77187


 25%|██▍       | 491/2000 [14:06:28<49:50:59, 118.93s/it]

Training_loss 0.56839,   Accuracy 0.77158


 25%|██▍       | 492/2000 [14:08:21<49:03:14, 117.10s/it]

Training_loss 0.56823,   Accuracy 0.77158


 25%|██▍       | 493/2000 [14:10:07<47:39:29, 113.85s/it]

Training_loss 0.56827,   Accuracy 0.77161


 25%|██▍       | 494/2000 [14:11:55<46:48:15, 111.88s/it]

Training_loss 0.56820,   Accuracy 0.77161


 25%|██▍       | 495/2000 [14:13:39<45:48:51, 109.59s/it]

Training_loss 0.56807,   Accuracy 0.77145


 25%|██▍       | 496/2000 [14:15:29<45:52:50, 109.82s/it]

Training_loss 0.56800,   Accuracy 0.77168


 25%|██▍       | 497/2000 [14:17:32<47:27:49, 113.69s/it]

Training_loss 0.56807,   Accuracy 0.77148


 25%|██▍       | 498/2000 [14:19:31<48:02:26, 115.14s/it]

Training_loss 0.56800,   Accuracy 0.77148


 25%|██▍       | 499/2000 [14:21:41<49:53:56, 119.68s/it]

Training_loss 0.56794,   Accuracy 0.77148


 25%|██▌       | 500/2000 [14:23:33<48:57:58, 117.52s/it]

Training_loss 0.56779,   Accuracy 0.77148


 25%|██▌       | 501/2000 [14:25:19<47:24:33, 113.86s/it]

Training_loss 0.56779,   Accuracy 0.77155


 25%|██▌       | 502/2000 [14:27:14<47:34:34, 114.34s/it]

Training_loss 0.56768,   Accuracy 0.77151


 25%|██▌       | 503/2000 [14:29:03<46:48:35, 112.57s/it]

Training_loss 0.56770,   Accuracy 0.77193


 25%|██▌       | 504/2000 [14:30:50<46:11:38, 111.16s/it]

Training_loss 0.56759,   Accuracy 0.77168


 25%|██▌       | 505/2000 [14:32:36<45:27:37, 109.47s/it]

Training_loss 0.56773,   Accuracy 0.77245


 25%|██▌       | 506/2000 [14:34:28<45:47:04, 110.32s/it]

Training_loss 0.56786,   Accuracy 0.77248


 25%|██▌       | 507/2000 [14:36:12<44:54:20, 108.28s/it]

Training_loss 0.56793,   Accuracy 0.77281


 25%|██▌       | 508/2000 [14:37:56<44:23:25, 107.11s/it]

Training_loss 0.56765,   Accuracy 0.77274


 25%|██▌       | 509/2000 [14:39:43<44:16:37, 106.91s/it]

Training_loss 0.56759,   Accuracy 0.77287


 26%|██▌       | 510/2000 [14:41:28<44:01:39, 106.38s/it]

Training_loss 0.56744,   Accuracy 0.77287


 26%|██▌       | 511/2000 [14:43:18<44:28:36, 107.53s/it]

Training_loss 0.56743,   Accuracy 0.77287


 26%|██▌       | 512/2000 [14:45:07<44:41:40, 108.13s/it]

Training_loss 0.56748,   Accuracy 0.77287


 26%|██▌       | 513/2000 [14:46:52<44:12:16, 107.02s/it]

Training_loss 0.56741,   Accuracy 0.77297


 26%|██▌       | 514/2000 [14:48:41<44:26:28, 107.66s/it]

Training_loss 0.56734,   Accuracy 0.77300


 26%|██▌       | 515/2000 [14:50:33<44:55:43, 108.92s/it]

Training_loss 0.56735,   Accuracy 0.77303


 26%|██▌       | 516/2000 [14:52:18<44:24:34, 107.73s/it]

Training_loss 0.56728,   Accuracy 0.77306


 26%|██▌       | 517/2000 [14:54:04<44:12:26, 107.31s/it]

Training_loss 0.56717,   Accuracy 0.77303


 26%|██▌       | 518/2000 [14:55:50<43:57:36, 106.79s/it]

Training_loss 0.56709,   Accuracy 0.77303


 26%|██▌       | 519/2000 [14:57:40<44:19:01, 107.73s/it]

Training_loss 0.56704,   Accuracy 0.77316


 26%|██▌       | 520/2000 [14:59:25<43:59:32, 107.01s/it]

Training_loss 0.56693,   Accuracy 0.77319


 26%|██▌       | 521/2000 [15:01:11<43:47:44, 106.60s/it]

Training_loss 0.56699,   Accuracy 0.77352


 26%|██▌       | 522/2000 [15:02:59<43:59:03, 107.13s/it]

Training_loss 0.56693,   Accuracy 0.77352


 26%|██▌       | 523/2000 [15:04:43<43:31:46, 106.10s/it]

Training_loss 0.56686,   Accuracy 0.77352


 26%|██▌       | 524/2000 [15:06:29<43:31:18, 106.15s/it]

Training_loss 0.56701,   Accuracy 0.77390


 26%|██▋       | 525/2000 [15:08:18<43:50:54, 107.02s/it]

Training_loss 0.56710,   Accuracy 0.77381


 26%|██▋       | 526/2000 [15:10:05<43:51:32, 107.12s/it]

Training_loss 0.56701,   Accuracy 0.77390


 26%|██▋       | 527/2000 [15:11:53<43:55:27, 107.35s/it]

Training_loss 0.56669,   Accuracy 0.77361


 26%|██▋       | 528/2000 [15:13:39<43:42:13, 106.88s/it]

Training_loss 0.56650,   Accuracy 0.77358


 26%|██▋       | 529/2000 [15:15:24<43:22:22, 106.15s/it]

Training_loss 0.56658,   Accuracy 0.77348


 26%|██▋       | 530/2000 [15:17:08<43:11:41, 105.78s/it]

Training_loss 0.56647,   Accuracy 0.77381


 27%|██▋       | 531/2000 [15:18:52<42:56:23, 105.23s/it]

Training_loss 0.56652,   Accuracy 0.77381


 27%|██▋       | 532/2000 [15:20:39<43:07:59, 105.78s/it]

Training_loss 0.56638,   Accuracy 0.77374


 27%|██▋       | 533/2000 [15:22:26<43:09:37, 105.91s/it]

Training_loss 0.56623,   Accuracy 0.77361


 27%|██▋       | 534/2000 [15:24:11<43:03:25, 105.73s/it]

Training_loss 0.56606,   Accuracy 0.77348


 27%|██▋       | 535/2000 [15:25:55<42:52:03, 105.34s/it]

Training_loss 0.56626,   Accuracy 0.77381


 27%|██▋       | 536/2000 [15:27:43<43:06:05, 105.99s/it]

Training_loss 0.56626,   Accuracy 0.77381


 27%|██▋       | 537/2000 [15:29:26<42:44:56, 105.19s/it]

Training_loss 0.56625,   Accuracy 0.77381


 27%|██▋       | 538/2000 [15:31:11<42:39:54, 105.06s/it]

Training_loss 0.56625,   Accuracy 0.77358


 27%|██▋       | 539/2000 [15:32:58<42:49:07, 105.51s/it]

Training_loss 0.56615,   Accuracy 0.77348


 27%|██▋       | 540/2000 [15:34:42<42:40:18, 105.22s/it]

Training_loss 0.56596,   Accuracy 0.77348


 27%|██▋       | 541/2000 [15:36:25<42:24:37, 104.65s/it]

Training_loss 0.56598,   Accuracy 0.77348


 27%|██▋       | 542/2000 [15:38:13<42:44:07, 105.52s/it]

Training_loss 0.56570,   Accuracy 0.77342


 27%|██▋       | 543/2000 [15:39:59<42:45:38, 105.65s/it]

Training_loss 0.56550,   Accuracy 0.77342


 27%|██▋       | 544/2000 [15:41:43<42:34:25, 105.26s/it]

Training_loss 0.56565,   Accuracy 0.77345


 27%|██▋       | 545/2000 [15:43:30<42:42:41, 105.68s/it]

Training_loss 0.56549,   Accuracy 0.77345


 27%|██▋       | 546/2000 [15:45:16<42:41:07, 105.69s/it]

Training_loss 0.56544,   Accuracy 0.77342


 27%|██▋       | 547/2000 [15:47:03<42:54:05, 106.29s/it]

Training_loss 0.56535,   Accuracy 0.77339


 27%|██▋       | 548/2000 [15:48:48<42:38:42, 105.73s/it]

Training_loss 0.56528,   Accuracy 0.77342


 27%|██▋       | 549/2000 [15:50:47<44:16:56, 109.87s/it]

Training_loss 0.56535,   Accuracy 0.77345


 28%|██▊       | 550/2000 [15:52:31<43:32:54, 108.12s/it]

Training_loss 0.56547,   Accuracy 0.77381


 28%|██▊       | 551/2000 [15:54:17<43:15:55, 107.49s/it]

Training_loss 0.56543,   Accuracy 0.77374


 28%|██▊       | 552/2000 [15:56:04<43:07:26, 107.21s/it]

Training_loss 0.56538,   Accuracy 0.77352


 28%|██▊       | 553/2000 [15:57:53<43:16:22, 107.66s/it]

Training_loss 0.56534,   Accuracy 0.77352


 28%|██▊       | 554/2000 [15:59:37<42:50:48, 106.67s/it]

Training_loss 0.56510,   Accuracy 0.77361


 28%|██▊       | 555/2000 [16:01:30<43:36:33, 108.65s/it]

Training_loss 0.56517,   Accuracy 0.77423


 28%|██▊       | 556/2000 [16:03:20<43:42:57, 108.99s/it]

Training_loss 0.56515,   Accuracy 0.77445


 28%|██▊       | 557/2000 [16:05:04<43:04:59, 107.48s/it]

Training_loss 0.56505,   Accuracy 0.77423


 28%|██▊       | 558/2000 [16:06:52<43:06:02, 107.60s/it]

Training_loss 0.56482,   Accuracy 0.77361


 28%|██▊       | 559/2000 [16:08:56<45:01:13, 112.47s/it]

Training_loss 0.56479,   Accuracy 0.77339


 28%|██▊       | 560/2000 [16:10:47<44:52:38, 112.19s/it]

Training_loss 0.56469,   Accuracy 0.77339


 28%|██▊       | 561/2000 [16:12:36<44:27:10, 111.21s/it]

Training_loss 0.56458,   Accuracy 0.77361


 28%|██▊       | 562/2000 [16:14:23<43:52:29, 109.84s/it]

Training_loss 0.56474,   Accuracy 0.77339


 28%|██▊       | 563/2000 [16:16:14<44:03:12, 110.36s/it]

Training_loss 0.56461,   Accuracy 0.77339


 28%|██▊       | 564/2000 [16:18:08<44:25:50, 111.39s/it]

Training_loss 0.56466,   Accuracy 0.77339


 28%|██▊       | 565/2000 [16:20:02<44:43:30, 112.20s/it]

Training_loss 0.56447,   Accuracy 0.77339


 28%|██▊       | 566/2000 [16:21:57<44:56:30, 112.82s/it]

Training_loss 0.56439,   Accuracy 0.77329


 28%|██▊       | 567/2000 [16:23:51<45:05:08, 113.27s/it]

Training_loss 0.56441,   Accuracy 0.77329


 28%|██▊       | 568/2000 [16:25:42<44:45:17, 112.51s/it]

Training_loss 0.56418,   Accuracy 0.77332


 28%|██▊       | 569/2000 [16:27:38<45:14:15, 113.81s/it]

Training_loss 0.56418,   Accuracy 0.77336


 28%|██▊       | 570/2000 [16:29:42<46:23:05, 116.77s/it]

Training_loss 0.56423,   Accuracy 0.77365


 29%|██▊       | 571/2000 [16:31:34<45:49:03, 115.43s/it]

Training_loss 0.56410,   Accuracy 0.77339


 29%|██▊       | 572/2000 [16:33:28<45:31:53, 114.79s/it]

Training_loss 0.56393,   Accuracy 0.77326


 29%|██▊       | 573/2000 [16:35:19<45:07:52, 113.86s/it]

Training_loss 0.56365,   Accuracy 0.77348


 29%|██▊       | 574/2000 [16:37:12<44:54:50, 113.39s/it]

Training_loss 0.56375,   Accuracy 0.77329


 29%|██▉       | 575/2000 [16:39:08<45:15:15, 114.33s/it]

Training_loss 0.56400,   Accuracy 0.77371


 29%|██▉       | 576/2000 [16:40:55<44:19:11, 112.04s/it]

Training_loss 0.56388,   Accuracy 0.77361


 29%|██▉       | 577/2000 [16:42:42<43:43:07, 110.60s/it]

Training_loss 0.56382,   Accuracy 0.77374


 29%|██▉       | 578/2000 [16:44:27<42:58:36, 108.80s/it]

Training_loss 0.56402,   Accuracy 0.77423


 29%|██▉       | 579/2000 [16:46:11<42:24:10, 107.42s/it]

Training_loss 0.56399,   Accuracy 0.77491


 29%|██▉       | 580/2000 [16:47:58<42:17:00, 107.20s/it]

Training_loss 0.56400,   Accuracy 0.77500


 29%|██▉       | 581/2000 [16:49:45<42:18:57, 107.36s/it]

Training_loss 0.56373,   Accuracy 0.77465


 29%|██▉       | 582/2000 [16:51:29<41:49:27, 106.18s/it]

Training_loss 0.56353,   Accuracy 0.77378


 29%|██▉       | 583/2000 [16:53:15<41:45:28, 106.09s/it]

Training_loss 0.56342,   Accuracy 0.77390


 29%|██▉       | 584/2000 [16:55:00<41:35:46, 105.75s/it]

Training_loss 0.56352,   Accuracy 0.77465


 29%|██▉       | 585/2000 [16:56:46<41:35:24, 105.81s/it]

Training_loss 0.56360,   Accuracy 0.77523


 29%|██▉       | 586/2000 [16:58:34<41:51:35, 106.57s/it]

Training_loss 0.56352,   Accuracy 0.77649


 29%|██▉       | 587/2000 [17:00:23<42:07:08, 107.31s/it]

Training_loss 0.56356,   Accuracy 0.77691


 29%|██▉       | 588/2000 [17:02:11<42:10:06, 107.51s/it]

Training_loss 0.56359,   Accuracy 0.77768


 29%|██▉       | 589/2000 [17:04:10<43:28:50, 110.94s/it]

Training_loss 0.56345,   Accuracy 0.77681


 30%|██▉       | 590/2000 [17:06:07<44:11:50, 112.84s/it]

Training_loss 0.56354,   Accuracy 0.77681


 30%|██▉       | 591/2000 [17:08:03<44:29:07, 113.66s/it]

Training_loss 0.56353,   Accuracy 0.77701


 30%|██▉       | 592/2000 [17:10:05<45:30:05, 116.34s/it]

Training_loss 0.56347,   Accuracy 0.77752


 30%|██▉       | 593/2000 [17:12:03<45:37:31, 116.74s/it]

Training_loss 0.56339,   Accuracy 0.77607


 30%|██▉       | 594/2000 [17:14:00<45:34:17, 116.68s/it]

Training_loss 0.56350,   Accuracy 0.77807


 30%|██▉       | 595/2000 [17:15:54<45:15:44, 115.97s/it]

Training_loss 0.56340,   Accuracy 0.77798


 30%|██▉       | 596/2000 [17:17:49<45:09:42, 115.80s/it]

Training_loss 0.56340,   Accuracy 0.77798


 30%|██▉       | 597/2000 [17:19:45<45:08:44, 115.84s/it]

Training_loss 0.56319,   Accuracy 0.77781


 30%|██▉       | 598/2000 [17:21:28<43:34:27, 111.89s/it]

Training_loss 0.56303,   Accuracy 0.77749


 30%|██▉       | 599/2000 [17:23:16<43:09:03, 110.88s/it]

Training_loss 0.56296,   Accuracy 0.77778


 30%|███       | 600/2000 [17:25:02<42:27:06, 109.16s/it]

Training_loss 0.56291,   Accuracy 0.77778


 30%|███       | 601/2000 [17:26:52<42:35:43, 109.61s/it]

Training_loss 0.56284,   Accuracy 0.77804


 30%|███       | 602/2000 [17:28:40<42:20:59, 109.06s/it]

Training_loss 0.56282,   Accuracy 0.77810


 30%|███       | 603/2000 [17:30:27<42:04:24, 108.42s/it]

Training_loss 0.56267,   Accuracy 0.77788


 30%|███       | 604/2000 [17:32:12<41:39:08, 107.41s/it]

Training_loss 0.56276,   Accuracy 0.77804


 30%|███       | 605/2000 [17:33:03<35:07:19, 90.64s/it] 

Training_loss 0.56264,   Accuracy 0.77668


 30%|███       | 606/2000 [17:33:17<26:08:55, 67.53s/it]

Training_loss 0.56266,   Accuracy 0.77891


 30%|███       | 607/2000 [17:33:30<19:44:31, 51.02s/it]

Training_loss 0.56261,   Accuracy 0.77659


 30%|███       | 608/2000 [17:33:44<15:29:50, 40.08s/it]

Training_loss 0.56246,   Accuracy 0.77630


 30%|███       | 609/2000 [17:33:58<12:24:11, 32.10s/it]

Training_loss 0.56216,   Accuracy 0.77487


 30%|███       | 610/2000 [17:34:13<10:25:12, 26.99s/it]

Training_loss 0.56201,   Accuracy 0.77484


 31%|███       | 611/2000 [17:34:27<8:53:39, 23.05s/it] 

Training_loss 0.56195,   Accuracy 0.77465


 31%|███       | 612/2000 [17:34:40<7:48:30, 20.25s/it]

Training_loss 0.56196,   Accuracy 0.77474


 31%|███       | 613/2000 [17:34:55<7:08:07, 18.52s/it]

Training_loss 0.56200,   Accuracy 0.77536


 31%|███       | 614/2000 [17:35:07<6:24:29, 16.64s/it]

Training_loss 0.56192,   Accuracy 0.77504


 31%|███       | 615/2000 [17:35:19<5:52:04, 15.25s/it]

Training_loss 0.56186,   Accuracy 0.77471


 31%|███       | 616/2000 [17:35:33<5:43:44, 14.90s/it]

Training_loss 0.56188,   Accuracy 0.77565


 31%|███       | 617/2000 [17:35:48<5:45:59, 15.01s/it]

Training_loss 0.56187,   Accuracy 0.77571


 31%|███       | 618/2000 [17:36:03<5:43:28, 14.91s/it]

Training_loss 0.56183,   Accuracy 0.77623


 31%|███       | 619/2000 [17:36:17<5:36:14, 14.61s/it]

Training_loss 0.56172,   Accuracy 0.77623


 31%|███       | 620/2000 [17:36:31<5:29:03, 14.31s/it]

Training_loss 0.56166,   Accuracy 0.77633


 31%|███       | 621/2000 [17:36:45<5:27:14, 14.24s/it]

Training_loss 0.56157,   Accuracy 0.77584


 31%|███       | 622/2000 [17:36:58<5:18:02, 13.85s/it]

Training_loss 0.56166,   Accuracy 0.77681


 31%|███       | 623/2000 [17:37:11<5:14:34, 13.71s/it]

Training_loss 0.56158,   Accuracy 0.77662


 31%|███       | 624/2000 [17:37:24<5:13:04, 13.65s/it]

Training_loss 0.56156,   Accuracy 0.77678


 31%|███▏      | 625/2000 [17:37:38<5:12:50, 13.65s/it]

Training_loss 0.56161,   Accuracy 0.77985


 31%|███▏      | 626/2000 [17:37:54<5:27:48, 14.31s/it]

Training_loss 0.56153,   Accuracy 0.77985


 31%|███▏      | 627/2000 [17:38:13<5:58:41, 15.67s/it]

Training_loss 0.56161,   Accuracy 0.77965


 31%|███▏      | 628/2000 [17:38:30<6:10:51, 16.22s/it]

Training_loss 0.56174,   Accuracy 0.77982


 31%|███▏      | 629/2000 [17:38:50<6:36:06, 17.34s/it]

Training_loss 0.56162,   Accuracy 0.77982


 32%|███▏      | 630/2000 [17:39:10<6:54:54, 18.17s/it]

Training_loss 0.56158,   Accuracy 0.77988


 32%|███▏      | 631/2000 [17:39:31<7:13:06, 18.98s/it]

Training_loss 0.56159,   Accuracy 0.78030


 32%|███▏      | 632/2000 [17:40:02<8:36:01, 22.63s/it]

Training_loss 0.56156,   Accuracy 0.77995


 32%|███▏      | 633/2000 [17:40:29<9:04:34, 23.90s/it]

Training_loss 0.56123,   Accuracy 0.77840


 32%|███▏      | 634/2000 [17:40:56<9:21:43, 24.67s/it]

Training_loss 0.56117,   Accuracy 0.77759


 32%|███▏      | 635/2000 [17:41:28<10:16:25, 27.10s/it]

Training_loss 0.56114,   Accuracy 0.77840


 32%|███▏      | 636/2000 [17:41:56<10:18:25, 27.20s/it]

Training_loss 0.56103,   Accuracy 0.77849


 32%|███▏      | 637/2000 [17:42:20<9:53:09, 26.11s/it] 

Training_loss 0.56092,   Accuracy 0.77856


 32%|███▏      | 638/2000 [17:42:44<9:39:58, 25.55s/it]

Training_loss 0.56099,   Accuracy 0.77991


 32%|███▏      | 639/2000 [17:43:07<9:23:01, 24.82s/it]

Training_loss 0.56088,   Accuracy 0.77859


 32%|███▏      | 640/2000 [17:43:31<9:19:33, 24.69s/it]

Training_loss 0.56079,   Accuracy 0.77865


 32%|███▏      | 641/2000 [17:43:57<9:25:31, 24.97s/it]

Training_loss 0.56063,   Accuracy 0.77778


 32%|███▏      | 642/2000 [17:44:19<9:05:55, 24.12s/it]

Training_loss 0.56072,   Accuracy 0.77836


 32%|███▏      | 643/2000 [17:44:44<9:14:35, 24.52s/it]

Training_loss 0.56071,   Accuracy 0.77836


 32%|███▏      | 644/2000 [17:45:09<9:13:50, 24.51s/it]

Training_loss 0.56075,   Accuracy 0.77885


 32%|███▏      | 645/2000 [17:45:32<9:01:02, 23.96s/it]

Training_loss 0.56079,   Accuracy 0.78040


 32%|███▏      | 646/2000 [17:45:53<8:45:29, 23.29s/it]

Training_loss 0.56079,   Accuracy 0.78037


 32%|███▏      | 647/2000 [17:46:25<9:42:29, 25.83s/it]

Training_loss 0.56069,   Accuracy 0.78004


 32%|███▏      | 648/2000 [17:46:48<9:18:52, 24.80s/it]

Training_loss 0.56066,   Accuracy 0.78014


 32%|███▏      | 649/2000 [17:47:12<9:15:45, 24.68s/it]

Training_loss 0.56070,   Accuracy 0.78046


 32%|███▎      | 650/2000 [17:47:32<8:41:12, 23.16s/it]

Training_loss 0.56056,   Accuracy 0.78014


 33%|███▎      | 651/2000 [17:47:54<8:35:40, 22.94s/it]

Training_loss 0.56048,   Accuracy 0.78014


 33%|███▎      | 652/2000 [17:48:17<8:34:36, 22.91s/it]

Training_loss 0.56030,   Accuracy 0.77882


 33%|███▎      | 653/2000 [17:48:39<8:27:51, 22.62s/it]

Training_loss 0.56027,   Accuracy 0.77869


 33%|███▎      | 654/2000 [17:49:00<8:19:07, 22.25s/it]

Training_loss 0.56026,   Accuracy 0.78124


 33%|███▎      | 655/2000 [17:49:19<7:56:43, 21.27s/it]

Training_loss 0.56015,   Accuracy 0.78114


 33%|███▎      | 656/2000 [17:49:41<7:58:09, 21.35s/it]

Training_loss 0.55995,   Accuracy 0.77946


 33%|███▎      | 657/2000 [17:50:02<7:59:09, 21.41s/it]

Training_loss 0.55984,   Accuracy 0.77943


 33%|███▎      | 658/2000 [17:50:23<7:57:29, 21.35s/it]

Training_loss 0.56002,   Accuracy 0.78169


 33%|███▎      | 659/2000 [17:50:44<7:50:48, 21.07s/it]

Training_loss 0.55988,   Accuracy 0.78169


 33%|███▎      | 660/2000 [17:51:04<7:48:01, 20.96s/it]

Training_loss 0.55981,   Accuracy 0.78169


 33%|███▎      | 661/2000 [17:51:27<7:57:36, 21.40s/it]

Training_loss 0.55987,   Accuracy 0.78175


 33%|███▎      | 662/2000 [17:51:49<8:02:10, 21.62s/it]

Training_loss 0.55995,   Accuracy 0.78185


 33%|███▎      | 663/2000 [17:52:12<8:12:05, 22.08s/it]

Training_loss 0.55995,   Accuracy 0.78227


 33%|███▎      | 664/2000 [17:52:35<8:17:43, 22.35s/it]

Training_loss 0.56001,   Accuracy 0.78311


 33%|███▎      | 665/2000 [17:52:56<8:04:31, 21.78s/it]

Training_loss 0.55992,   Accuracy 0.78308


 33%|███▎      | 666/2000 [17:53:16<7:52:55, 21.27s/it]

Training_loss 0.56007,   Accuracy 0.78301


 33%|███▎      | 667/2000 [17:53:37<7:49:40, 21.14s/it]

Training_loss 0.56017,   Accuracy 0.78301


 33%|███▎      | 668/2000 [17:53:58<7:51:12, 21.23s/it]

Training_loss 0.56038,   Accuracy 0.78379


 33%|███▎      | 669/2000 [17:54:18<7:45:59, 21.01s/it]

Training_loss 0.56021,   Accuracy 0.78321


 34%|███▎      | 670/2000 [17:54:43<8:07:29, 21.99s/it]

Training_loss 0.56045,   Accuracy 0.78398


 34%|███▎      | 671/2000 [17:55:05<8:05:59, 21.94s/it]

Training_loss 0.56042,   Accuracy 0.78382


 34%|███▎      | 672/2000 [17:55:29<8:23:46, 22.76s/it]

Training_loss 0.56048,   Accuracy 0.78408


 34%|███▎      | 673/2000 [17:55:50<8:08:33, 22.09s/it]

Training_loss 0.56036,   Accuracy 0.78402


 34%|███▎      | 674/2000 [17:56:18<8:45:46, 23.79s/it]

Training_loss 0.56035,   Accuracy 0.78402


 34%|███▍      | 675/2000 [17:56:38<8:24:47, 22.86s/it]

Training_loss 0.56036,   Accuracy 0.78415


 34%|███▍      | 676/2000 [17:57:01<8:22:38, 22.78s/it]

Training_loss 0.56054,   Accuracy 0.78447


 34%|███▍      | 677/2000 [17:57:21<8:06:05, 22.04s/it]

Training_loss 0.56057,   Accuracy 0.78447


 34%|███▍      | 678/2000 [17:57:42<7:55:57, 21.60s/it]

Training_loss 0.56029,   Accuracy 0.78447


 34%|███▍      | 679/2000 [17:58:01<7:41:14, 20.95s/it]

Training_loss 0.56034,   Accuracy 0.78453


 34%|███▍      | 680/2000 [17:58:23<7:44:42, 21.12s/it]

Training_loss 0.56051,   Accuracy 0.78592


 34%|███▍      | 681/2000 [17:58:45<7:50:01, 21.38s/it]

Training_loss 0.56050,   Accuracy 0.78615


 34%|███▍      | 682/2000 [17:59:06<7:47:10, 21.27s/it]

Training_loss 0.56057,   Accuracy 0.78670


 34%|███▍      | 683/2000 [17:59:28<7:51:54, 21.50s/it]

Training_loss 0.56072,   Accuracy 0.78721


 34%|███▍      | 684/2000 [17:59:50<7:54:33, 21.64s/it]

Training_loss 0.56075,   Accuracy 0.78744


 34%|███▍      | 685/2000 [18:00:11<7:49:03, 21.40s/it]

Training_loss 0.56066,   Accuracy 0.78747


 34%|███▍      | 686/2000 [18:00:31<7:45:28, 21.25s/it]

Training_loss 0.56074,   Accuracy 0.78854


 34%|███▍      | 687/2000 [18:00:53<7:47:23, 21.36s/it]

Training_loss 0.56077,   Accuracy 0.78860


 34%|███▍      | 688/2000 [18:01:15<7:49:44, 21.48s/it]

Training_loss 0.56080,   Accuracy 0.78860


 34%|███▍      | 689/2000 [18:01:36<7:49:08, 21.47s/it]

Training_loss 0.56073,   Accuracy 0.78860


 34%|███▍      | 690/2000 [18:02:02<8:17:28, 22.79s/it]

Training_loss 0.56069,   Accuracy 0.78860


 35%|███▍      | 691/2000 [18:02:29<8:45:22, 24.08s/it]

Training_loss 0.56049,   Accuracy 0.78857


 35%|███▍      | 692/2000 [18:02:55<8:54:07, 24.50s/it]

Training_loss 0.56053,   Accuracy 0.78899


 35%|███▍      | 693/2000 [18:03:25<9:32:20, 26.27s/it]

Training_loss 0.56062,   Accuracy 0.78828


 35%|███▍      | 694/2000 [18:03:51<9:28:59, 26.14s/it]

Training_loss 0.56050,   Accuracy 0.78847


 35%|███▍      | 695/2000 [18:04:17<9:29:58, 26.21s/it]

Training_loss 0.56074,   Accuracy 0.78802


 35%|███▍      | 696/2000 [18:04:41<9:14:54, 25.53s/it]

Training_loss 0.56085,   Accuracy 0.78783


 35%|███▍      | 697/2000 [18:05:06<9:09:10, 25.29s/it]

Training_loss 0.56082,   Accuracy 0.78780


 35%|███▍      | 698/2000 [18:05:26<8:33:35, 23.67s/it]

Training_loss 0.56078,   Accuracy 0.78786


 35%|███▍      | 699/2000 [18:05:47<8:16:58, 22.92s/it]

Training_loss 0.56079,   Accuracy 0.78786


 35%|███▌      | 700/2000 [18:06:10<8:17:46, 22.97s/it]

Training_loss 0.56071,   Accuracy 0.78773


 35%|███▌      | 701/2000 [18:06:31<8:02:29, 22.29s/it]

Training_loss 0.56070,   Accuracy 0.78776


 35%|███▌      | 702/2000 [18:06:51<7:46:13, 21.55s/it]

Training_loss 0.56067,   Accuracy 0.78783


 35%|███▌      | 703/2000 [18:07:11<7:37:42, 21.17s/it]

Training_loss 0.56072,   Accuracy 0.78783


 35%|███▌      | 704/2000 [18:07:30<7:24:07, 20.56s/it]

Training_loss 0.56058,   Accuracy 0.78792


 35%|███▌      | 705/2000 [18:07:49<7:16:27, 20.22s/it]

Training_loss 0.56033,   Accuracy 0.78776


 35%|███▌      | 706/2000 [18:08:09<7:12:57, 20.08s/it]

Training_loss 0.56032,   Accuracy 0.78783


 35%|███▌      | 707/2000 [18:08:29<7:09:33, 19.93s/it]

Training_loss 0.56047,   Accuracy 0.78828


 35%|███▌      | 708/2000 [18:08:49<7:13:30, 20.13s/it]

Training_loss 0.56039,   Accuracy 0.78831


 35%|███▌      | 709/2000 [18:09:10<7:18:49, 20.39s/it]

Training_loss 0.56022,   Accuracy 0.78831


 36%|███▌      | 710/2000 [18:09:30<7:15:01, 20.23s/it]

Training_loss 0.56034,   Accuracy 0.78844


 36%|███▌      | 711/2000 [18:09:50<7:11:51, 20.10s/it]

Training_loss 0.56044,   Accuracy 0.78902


 36%|███▌      | 712/2000 [18:10:09<7:07:00, 19.89s/it]

Training_loss 0.56010,   Accuracy 0.78834


 36%|███▌      | 713/2000 [18:10:31<7:15:16, 20.29s/it]

Training_loss 0.56013,   Accuracy 0.78841


 36%|███▌      | 714/2000 [18:10:50<7:11:20, 20.12s/it]

Training_loss 0.56005,   Accuracy 0.78841


 36%|███▌      | 715/2000 [18:11:11<7:11:40, 20.16s/it]

Training_loss 0.55998,   Accuracy 0.78834


 36%|███▌      | 716/2000 [18:11:30<7:07:16, 19.97s/it]

Training_loss 0.56001,   Accuracy 0.78834


 36%|███▌      | 717/2000 [18:11:53<7:22:32, 20.70s/it]

Training_loss 0.55988,   Accuracy 0.78809


 36%|███▌      | 718/2000 [18:12:13<7:18:25, 20.52s/it]

Training_loss 0.55973,   Accuracy 0.78792


 36%|███▌      | 719/2000 [18:12:32<7:11:45, 20.22s/it]

Training_loss 0.55990,   Accuracy 0.78815


 36%|███▌      | 720/2000 [18:12:52<7:08:32, 20.09s/it]

Training_loss 0.55972,   Accuracy 0.78767


 36%|███▌      | 721/2000 [18:13:11<7:00:59, 19.75s/it]

Training_loss 0.55977,   Accuracy 0.78767


 36%|███▌      | 722/2000 [18:13:31<7:01:16, 19.78s/it]

Training_loss 0.55971,   Accuracy 0.78767


 36%|███▌      | 723/2000 [18:13:51<7:01:39, 19.81s/it]

Training_loss 0.55966,   Accuracy 0.78767


 36%|███▌      | 724/2000 [18:14:10<6:59:48, 19.74s/it]

Training_loss 0.55956,   Accuracy 0.78773


 36%|███▋      | 725/2000 [18:14:30<6:57:18, 19.64s/it]

Training_loss 0.55980,   Accuracy 0.78773


 36%|███▋      | 726/2000 [18:14:49<6:55:04, 19.55s/it]

Training_loss 0.55975,   Accuracy 0.78773


 36%|███▋      | 727/2000 [18:15:09<6:56:09, 19.61s/it]

Training_loss 0.55976,   Accuracy 0.78767


 36%|███▋      | 728/2000 [18:15:29<7:02:05, 19.91s/it]

Training_loss 0.55970,   Accuracy 0.78770


 36%|███▋      | 729/2000 [18:15:49<6:58:09, 19.74s/it]

Training_loss 0.55972,   Accuracy 0.78767


 36%|███▋      | 730/2000 [18:16:08<6:54:07, 19.56s/it]

Training_loss 0.55997,   Accuracy 0.78844


 37%|███▋      | 731/2000 [18:16:27<6:52:23, 19.50s/it]

Training_loss 0.55984,   Accuracy 0.78828


 37%|███▋      | 732/2000 [18:16:47<6:55:41, 19.67s/it]

Training_loss 0.55964,   Accuracy 0.78792


 37%|███▋      | 733/2000 [18:17:07<6:53:01, 19.56s/it]

Training_loss 0.55950,   Accuracy 0.78799


 37%|███▋      | 734/2000 [18:17:26<6:51:23, 19.50s/it]

Training_loss 0.55942,   Accuracy 0.78799


 37%|███▋      | 735/2000 [18:17:46<6:52:36, 19.57s/it]

Training_loss 0.55952,   Accuracy 0.78792


 37%|███▋      | 736/2000 [18:18:06<6:55:14, 19.71s/it]

Training_loss 0.55931,   Accuracy 0.78796


 37%|███▋      | 737/2000 [18:18:25<6:54:09, 19.68s/it]

Training_loss 0.55936,   Accuracy 0.78818


 37%|███▋      | 738/2000 [18:18:45<6:53:56, 19.68s/it]

Training_loss 0.55946,   Accuracy 0.78844


 37%|███▋      | 739/2000 [18:19:05<6:53:52, 19.69s/it]

Training_loss 0.55946,   Accuracy 0.78906


 37%|███▋      | 740/2000 [18:19:25<6:57:22, 19.88s/it]

Training_loss 0.55925,   Accuracy 0.78792


 37%|███▋      | 741/2000 [18:19:47<7:11:00, 20.54s/it]

Training_loss 0.55919,   Accuracy 0.78796


 37%|███▋      | 742/2000 [18:20:12<7:37:08, 21.80s/it]

Training_loss 0.55900,   Accuracy 0.78796


 37%|███▋      | 743/2000 [18:20:32<7:28:35, 21.41s/it]

Training_loss 0.55905,   Accuracy 0.78818


 37%|███▋      | 744/2000 [18:20:53<7:21:19, 21.08s/it]

Training_loss 0.55917,   Accuracy 0.78909


 37%|███▋      | 745/2000 [18:21:13<7:16:58, 20.89s/it]

Training_loss 0.55908,   Accuracy 0.78918


 37%|███▋      | 746/2000 [18:21:35<7:23:09, 21.20s/it]

Training_loss 0.55897,   Accuracy 0.78918


 37%|███▋      | 747/2000 [18:21:56<7:17:58, 20.97s/it]

Training_loss 0.55902,   Accuracy 0.78915


 37%|███▋      | 748/2000 [18:22:16<7:14:04, 20.80s/it]

Training_loss 0.55890,   Accuracy 0.78906


 37%|███▋      | 749/2000 [18:22:37<7:12:23, 20.74s/it]

Training_loss 0.55868,   Accuracy 0.78838


 38%|███▊      | 750/2000 [18:22:57<7:13:11, 20.79s/it]

Training_loss 0.55876,   Accuracy 0.78899


 38%|███▊      | 751/2000 [18:23:18<7:10:51, 20.70s/it]

Training_loss 0.55869,   Accuracy 0.78896


 38%|███▊      | 752/2000 [18:23:39<7:11:28, 20.74s/it]

Training_loss 0.55863,   Accuracy 0.78902


 38%|███▊      | 753/2000 [18:24:00<7:11:36, 20.77s/it]

Training_loss 0.55887,   Accuracy 0.78915


 38%|███▊      | 754/2000 [18:24:20<7:09:34, 20.69s/it]

Training_loss 0.55873,   Accuracy 0.78915


 38%|███▊      | 755/2000 [18:24:41<7:12:31, 20.84s/it]

Training_loss 0.55871,   Accuracy 0.78915


 38%|███▊      | 756/2000 [18:25:02<7:12:30, 20.86s/it]

Training_loss 0.55868,   Accuracy 0.78915


 38%|███▊      | 757/2000 [18:25:23<7:11:39, 20.84s/it]

Training_loss 0.55850,   Accuracy 0.78922


 38%|███▊      | 758/2000 [18:25:43<7:08:08, 20.68s/it]

Training_loss 0.55857,   Accuracy 0.78918


 38%|███▊      | 759/2000 [18:26:03<7:04:47, 20.54s/it]

Training_loss 0.55859,   Accuracy 0.78941


 38%|███▊      | 760/2000 [18:26:24<7:03:36, 20.50s/it]

Training_loss 0.55851,   Accuracy 0.78938


 38%|███▊      | 761/2000 [18:26:44<7:03:47, 20.52s/it]

Training_loss 0.55859,   Accuracy 0.78938


 38%|███▊      | 762/2000 [18:27:05<7:03:38, 20.53s/it]

Training_loss 0.55843,   Accuracy 0.78938


 38%|███▊      | 763/2000 [18:27:26<7:08:18, 20.77s/it]

Training_loss 0.55850,   Accuracy 0.78941


 38%|███▊      | 764/2000 [18:27:47<7:05:58, 20.68s/it]

Training_loss 0.55860,   Accuracy 0.79128


 38%|███▊      | 765/2000 [18:28:08<7:05:59, 20.70s/it]

Training_loss 0.55871,   Accuracy 0.79151


 38%|███▊      | 766/2000 [18:28:28<7:02:00, 20.52s/it]

Training_loss 0.55856,   Accuracy 0.79145


 38%|███▊      | 767/2000 [18:28:49<7:03:49, 20.62s/it]

Training_loss 0.55868,   Accuracy 0.79148


 38%|███▊      | 768/2000 [18:29:09<7:00:13, 20.47s/it]

Training_loss 0.55843,   Accuracy 0.79128


 38%|███▊      | 769/2000 [18:29:33<7:22:00, 21.54s/it]

Training_loss 0.55847,   Accuracy 0.79125


 38%|███▊      | 770/2000 [18:29:56<7:31:47, 22.04s/it]

Training_loss 0.55844,   Accuracy 0.79145


 39%|███▊      | 771/2000 [18:30:20<7:43:11, 22.61s/it]

Training_loss 0.55836,   Accuracy 0.79145


 39%|███▊      | 772/2000 [18:30:43<7:43:29, 22.65s/it]

Training_loss 0.55828,   Accuracy 0.79135


 39%|███▊      | 773/2000 [18:31:06<7:45:28, 22.76s/it]

Training_loss 0.55820,   Accuracy 0.79154


 39%|███▊      | 774/2000 [18:31:27<7:39:37, 22.49s/it]

Training_loss 0.55823,   Accuracy 0.79151


 39%|███▉      | 775/2000 [18:31:50<7:37:56, 22.43s/it]

Training_loss 0.55807,   Accuracy 0.79154


 39%|███▉      | 776/2000 [18:32:12<7:34:39, 22.29s/it]

Training_loss 0.55809,   Accuracy 0.79177


 39%|███▉      | 777/2000 [18:32:38<7:57:00, 23.40s/it]

Training_loss 0.55806,   Accuracy 0.79187


 39%|███▉      | 778/2000 [18:32:58<7:39:16, 22.55s/it]

Training_loss 0.55796,   Accuracy 0.79180


 39%|███▉      | 779/2000 [18:33:24<7:58:25, 23.51s/it]

Training_loss 0.55785,   Accuracy 0.79151


 39%|███▉      | 780/2000 [18:33:47<7:56:06, 23.41s/it]

Training_loss 0.55763,   Accuracy 0.79135


 39%|███▉      | 781/2000 [18:34:08<7:37:21, 22.51s/it]

Training_loss 0.55752,   Accuracy 0.79132


 39%|███▉      | 782/2000 [18:34:29<7:29:20, 22.14s/it]

Training_loss 0.55741,   Accuracy 0.79119


 39%|███▉      | 783/2000 [18:34:51<7:26:22, 22.01s/it]

Training_loss 0.55754,   Accuracy 0.79151


 39%|███▉      | 784/2000 [18:35:12<7:19:37, 21.69s/it]

Training_loss 0.55756,   Accuracy 0.79151


 39%|███▉      | 785/2000 [18:35:31<7:04:49, 20.98s/it]

Training_loss 0.55743,   Accuracy 0.79151


 39%|███▉      | 786/2000 [18:35:51<6:57:04, 20.61s/it]

Training_loss 0.55756,   Accuracy 0.79177


 39%|███▉      | 787/2000 [18:36:09<6:46:15, 20.09s/it]

Training_loss 0.55734,   Accuracy 0.79151


 39%|███▉      | 788/2000 [18:36:29<6:41:55, 19.90s/it]

Training_loss 0.55750,   Accuracy 0.79209


 39%|███▉      | 789/2000 [18:36:49<6:45:15, 20.08s/it]

Training_loss 0.55729,   Accuracy 0.79154


 40%|███▉      | 790/2000 [18:37:09<6:42:03, 19.94s/it]

Training_loss 0.55744,   Accuracy 0.79180


 40%|███▉      | 791/2000 [18:37:29<6:44:34, 20.08s/it]

Training_loss 0.55747,   Accuracy 0.79193


 40%|███▉      | 792/2000 [18:37:49<6:42:44, 20.00s/it]

Training_loss 0.55752,   Accuracy 0.79209


 40%|███▉      | 793/2000 [18:38:09<6:40:36, 19.91s/it]

Training_loss 0.55757,   Accuracy 0.79216


 40%|███▉      | 794/2000 [18:38:28<6:36:27, 19.72s/it]

Training_loss 0.55746,   Accuracy 0.79212


 40%|███▉      | 795/2000 [18:38:48<6:35:44, 19.71s/it]

Training_loss 0.55734,   Accuracy 0.79216


 40%|███▉      | 796/2000 [18:39:08<6:37:20, 19.80s/it]

Training_loss 0.55731,   Accuracy 0.79209


 40%|███▉      | 797/2000 [18:39:29<6:41:36, 20.03s/it]

Training_loss 0.55745,   Accuracy 0.79209


 40%|███▉      | 798/2000 [18:39:48<6:40:53, 20.01s/it]

Training_loss 0.55744,   Accuracy 0.79212


 40%|███▉      | 799/2000 [18:40:08<6:37:03, 19.84s/it]

Training_loss 0.55735,   Accuracy 0.79209


 40%|████      | 800/2000 [18:40:28<6:39:55, 20.00s/it]

Training_loss 0.55730,   Accuracy 0.79222


 40%|████      | 801/2000 [18:40:50<6:52:15, 20.63s/it]

Training_loss 0.55720,   Accuracy 0.79238


 40%|████      | 802/2000 [18:41:11<6:53:11, 20.69s/it]

Training_loss 0.55721,   Accuracy 0.79235


 40%|████      | 803/2000 [18:41:32<6:53:54, 20.75s/it]

Training_loss 0.55733,   Accuracy 0.79222


 40%|████      | 804/2000 [18:41:55<7:05:02, 21.32s/it]

Training_loss 0.55746,   Accuracy 0.79471


 40%|████      | 805/2000 [18:42:15<6:58:05, 20.99s/it]

Training_loss 0.55729,   Accuracy 0.79293


 40%|████      | 806/2000 [18:42:35<6:53:57, 20.80s/it]

Training_loss 0.55737,   Accuracy 0.79267


 40%|████      | 807/2000 [18:42:59<7:08:03, 21.53s/it]

Training_loss 0.55726,   Accuracy 0.79222


 40%|████      | 808/2000 [18:43:18<6:57:39, 21.02s/it]

Training_loss 0.55723,   Accuracy 0.79222


 40%|████      | 809/2000 [18:43:42<7:13:14, 21.83s/it]

Training_loss 0.55733,   Accuracy 0.79277


 40%|████      | 810/2000 [18:44:03<7:07:18, 21.55s/it]

Training_loss 0.55733,   Accuracy 0.79277


 41%|████      | 811/2000 [18:44:25<7:08:51, 21.64s/it]

Training_loss 0.55736,   Accuracy 0.79481


 41%|████      | 812/2000 [18:44:46<7:06:23, 21.54s/it]

Training_loss 0.55734,   Accuracy 0.79474


 41%|████      | 813/2000 [18:45:08<7:06:11, 21.54s/it]

Training_loss 0.55740,   Accuracy 0.79435


 41%|████      | 814/2000 [18:45:29<7:05:55, 21.55s/it]

Training_loss 0.55709,   Accuracy 0.79451


 41%|████      | 815/2000 [18:45:49<6:54:13, 20.97s/it]

Training_loss 0.55722,   Accuracy 0.79422


 41%|████      | 816/2000 [18:46:08<6:45:13, 20.54s/it]

Training_loss 0.55742,   Accuracy 0.79471


 41%|████      | 817/2000 [18:46:28<6:37:06, 20.14s/it]

Training_loss 0.55742,   Accuracy 0.79487


 41%|████      | 818/2000 [18:46:48<6:36:50, 20.14s/it]

Training_loss 0.55725,   Accuracy 0.79484


 41%|████      | 819/2000 [18:47:09<6:39:59, 20.32s/it]

Training_loss 0.55716,   Accuracy 0.79474


 41%|████      | 820/2000 [18:47:28<6:36:15, 20.15s/it]

Training_loss 0.55697,   Accuracy 0.79464


 41%|████      | 821/2000 [18:47:48<6:34:35, 20.08s/it]

Training_loss 0.55687,   Accuracy 0.79461


 41%|████      | 822/2000 [18:48:08<6:29:47, 19.85s/it]

Training_loss 0.55681,   Accuracy 0.79487


 41%|████      | 823/2000 [18:48:27<6:28:24, 19.80s/it]

Training_loss 0.55677,   Accuracy 0.79490


 41%|████      | 824/2000 [18:48:48<6:31:51, 19.99s/it]

Training_loss 0.55680,   Accuracy 0.79474


 41%|████▏     | 825/2000 [18:49:07<6:28:29, 19.84s/it]

Training_loss 0.55658,   Accuracy 0.79481


 41%|████▏     | 826/2000 [18:49:30<6:46:18, 20.77s/it]

Training_loss 0.55669,   Accuracy 0.79451


 41%|████▏     | 827/2000 [18:49:52<6:51:06, 21.03s/it]

Training_loss 0.55649,   Accuracy 0.79493


 41%|████▏     | 828/2000 [18:50:12<6:43:50, 20.67s/it]

Training_loss 0.55647,   Accuracy 0.79636


 41%|████▏     | 829/2000 [18:50:31<6:36:21, 20.31s/it]

Training_loss 0.55662,   Accuracy 0.79619


 42%|████▏     | 830/2000 [18:50:51<6:33:06, 20.16s/it]

Training_loss 0.55641,   Accuracy 0.79668


 42%|████▏     | 831/2000 [18:51:11<6:34:37, 20.25s/it]

Training_loss 0.55624,   Accuracy 0.79632


 42%|████▏     | 832/2000 [18:51:32<6:34:57, 20.29s/it]

Training_loss 0.55612,   Accuracy 0.79636


 42%|████▏     | 833/2000 [18:51:52<6:33:55, 20.25s/it]

Training_loss 0.55605,   Accuracy 0.79497


 42%|████▏     | 834/2000 [18:52:13<6:41:13, 20.65s/it]

Training_loss 0.55616,   Accuracy 0.79636


 42%|████▏     | 835/2000 [18:52:33<6:33:44, 20.28s/it]

Training_loss 0.55609,   Accuracy 0.79497


 42%|████▏     | 836/2000 [18:52:50<6:14:56, 19.33s/it]

Training_loss 0.55609,   Accuracy 0.79619


 42%|████▏     | 837/2000 [18:53:05<5:47:10, 17.91s/it]

Training_loss 0.55615,   Accuracy 0.79636


 42%|████▏     | 838/2000 [18:53:19<5:26:22, 16.85s/it]

Training_loss 0.55612,   Accuracy 0.79629


 42%|████▏     | 839/2000 [18:53:32<5:02:53, 15.65s/it]

Training_loss 0.55617,   Accuracy 0.79655


 42%|████▏     | 840/2000 [18:53:45<4:49:31, 14.98s/it]

Training_loss 0.55612,   Accuracy 0.79655


 42%|████▏     | 841/2000 [18:53:58<4:38:00, 14.39s/it]

Training_loss 0.55594,   Accuracy 0.79626


 42%|████▏     | 842/2000 [18:54:12<4:31:43, 14.08s/it]

Training_loss 0.55584,   Accuracy 0.79629


 42%|████▏     | 843/2000 [18:54:25<4:26:24, 13.82s/it]

Training_loss 0.55576,   Accuracy 0.79629


 42%|████▏     | 844/2000 [18:54:39<4:27:32, 13.89s/it]

Training_loss 0.55553,   Accuracy 0.79632


 42%|████▏     | 845/2000 [18:54:52<4:21:47, 13.60s/it]

Training_loss 0.55554,   Accuracy 0.79678


 42%|████▏     | 846/2000 [18:55:07<4:31:29, 14.12s/it]

Training_loss 0.55559,   Accuracy 0.79639


 42%|████▏     | 847/2000 [18:55:20<4:27:09, 13.90s/it]

Training_loss 0.55561,   Accuracy 0.79629


 42%|████▏     | 848/2000 [18:55:33<4:22:03, 13.65s/it]

Training_loss 0.55558,   Accuracy 0.79626


 42%|████▏     | 849/2000 [18:55:47<4:19:18, 13.52s/it]

Training_loss 0.55557,   Accuracy 0.79639


 42%|████▎     | 850/2000 [18:56:02<4:28:16, 14.00s/it]

Training_loss 0.55555,   Accuracy 0.79642


 43%|████▎     | 851/2000 [18:56:16<4:31:19, 14.17s/it]

Training_loss 0.55546,   Accuracy 0.79629


 43%|████▎     | 852/2000 [18:56:30<4:28:01, 14.01s/it]

Training_loss 0.55557,   Accuracy 0.79642


 43%|████▎     | 853/2000 [18:56:43<4:24:01, 13.81s/it]

Training_loss 0.55553,   Accuracy 0.79642


 43%|████▎     | 854/2000 [18:56:57<4:20:34, 13.64s/it]

Training_loss 0.55539,   Accuracy 0.79642


 43%|████▎     | 855/2000 [18:57:10<4:19:30, 13.60s/it]

Training_loss 0.55521,   Accuracy 0.79642


 43%|████▎     | 856/2000 [18:57:23<4:17:07, 13.49s/it]

Training_loss 0.55527,   Accuracy 0.79642


 43%|████▎     | 857/2000 [18:57:37<4:20:12, 13.66s/it]

Training_loss 0.55531,   Accuracy 0.79626


 43%|████▎     | 858/2000 [18:57:55<4:41:54, 14.81s/it]

Training_loss 0.55531,   Accuracy 0.79632


 43%|████▎     | 859/2000 [18:58:12<4:56:35, 15.60s/it]

Training_loss 0.55527,   Accuracy 0.79616


 43%|████▎     | 860/2000 [18:58:38<5:53:54, 18.63s/it]

Training_loss 0.55539,   Accuracy 0.79703


 43%|████▎     | 861/2000 [18:58:57<5:56:11, 18.76s/it]

Training_loss 0.55538,   Accuracy 0.79733


 43%|████▎     | 862/2000 [18:59:25<6:50:20, 21.63s/it]

Training_loss 0.55524,   Accuracy 0.79713


 43%|████▎     | 863/2000 [18:59:48<6:55:41, 21.94s/it]

Training_loss 0.55521,   Accuracy 0.79739


 43%|████▎     | 864/2000 [19:00:03<6:16:31, 19.89s/it]

Training_loss 0.55507,   Accuracy 0.79707


 43%|████▎     | 865/2000 [19:00:20<5:59:13, 18.99s/it]

Training_loss 0.55507,   Accuracy 0.79707


 43%|████▎     | 866/2000 [19:00:36<5:43:29, 18.17s/it]

Training_loss 0.55512,   Accuracy 0.79729


 43%|████▎     | 867/2000 [19:00:50<5:15:00, 16.68s/it]

Training_loss 0.55501,   Accuracy 0.79713


 43%|████▎     | 868/2000 [19:01:03<4:54:43, 15.62s/it]

Training_loss 0.55480,   Accuracy 0.79681


 43%|████▎     | 869/2000 [19:01:16<4:40:09, 14.86s/it]

Training_loss 0.55473,   Accuracy 0.79707


 44%|████▎     | 870/2000 [19:01:29<4:31:52, 14.44s/it]

Training_loss 0.55453,   Accuracy 0.79645


 44%|████▎     | 871/2000 [19:01:43<4:27:08, 14.20s/it]

Training_loss 0.55448,   Accuracy 0.79655


 44%|████▎     | 872/2000 [19:02:03<5:02:44, 16.10s/it]

Training_loss 0.55441,   Accuracy 0.79652


 44%|████▎     | 873/2000 [19:02:23<5:19:16, 17.00s/it]

Training_loss 0.55429,   Accuracy 0.79655


 44%|████▎     | 874/2000 [19:02:42<5:31:48, 17.68s/it]

Training_loss 0.55436,   Accuracy 0.79636


 44%|████▍     | 875/2000 [19:02:56<5:13:52, 16.74s/it]

Training_loss 0.55421,   Accuracy 0.79655


 44%|████▍     | 876/2000 [19:03:15<5:24:47, 17.34s/it]

Training_loss 0.55396,   Accuracy 0.79652


 44%|████▍     | 877/2000 [19:03:36<5:47:16, 18.55s/it]

Training_loss 0.55399,   Accuracy 0.79652


 44%|████▍     | 878/2000 [19:03:56<5:51:02, 18.77s/it]

Training_loss 0.55410,   Accuracy 0.79639


 44%|████▍     | 879/2000 [19:04:15<5:51:37, 18.82s/it]

Training_loss 0.55407,   Accuracy 0.79639


 44%|████▍     | 880/2000 [19:04:34<5:55:09, 19.03s/it]

Training_loss 0.55403,   Accuracy 0.79642


 44%|████▍     | 881/2000 [19:05:00<6:31:41, 21.00s/it]

Training_loss 0.55399,   Accuracy 0.79642


 44%|████▍     | 882/2000 [19:05:22<6:37:02, 21.31s/it]

Training_loss 0.55399,   Accuracy 0.79619


 44%|████▍     | 883/2000 [19:05:44<6:41:44, 21.58s/it]

Training_loss 0.55410,   Accuracy 0.79642


 44%|████▍     | 884/2000 [19:06:07<6:50:28, 22.07s/it]

Training_loss 0.55408,   Accuracy 0.79713


 44%|████▍     | 885/2000 [19:06:29<6:48:29, 21.98s/it]

Training_loss 0.55417,   Accuracy 0.79700


 44%|████▍     | 886/2000 [19:06:59<7:32:16, 24.36s/it]

Training_loss 0.55396,   Accuracy 0.79723


 44%|████▍     | 887/2000 [19:07:26<7:45:02, 25.07s/it]

Training_loss 0.55396,   Accuracy 0.79723


 44%|████▍     | 888/2000 [19:07:46<7:20:29, 23.77s/it]

Training_loss 0.55395,   Accuracy 0.79700


 44%|████▍     | 889/2000 [19:08:08<7:07:28, 23.09s/it]

Training_loss 0.55403,   Accuracy 0.79700


 44%|████▍     | 890/2000 [19:08:28<6:50:21, 22.18s/it]

Training_loss 0.55402,   Accuracy 0.79700


 45%|████▍     | 891/2000 [19:08:50<6:47:14, 22.03s/it]

Training_loss 0.55397,   Accuracy 0.79697


 45%|████▍     | 892/2000 [19:09:09<6:34:38, 21.37s/it]

Training_loss 0.55382,   Accuracy 0.79697


 45%|████▍     | 893/2000 [19:09:30<6:30:35, 21.17s/it]

Training_loss 0.55392,   Accuracy 0.79665


 45%|████▍     | 894/2000 [19:09:51<6:29:13, 21.12s/it]

Training_loss 0.55368,   Accuracy 0.79697


 45%|████▍     | 895/2000 [19:10:11<6:20:44, 20.67s/it]

Training_loss 0.55366,   Accuracy 0.79697


 45%|████▍     | 896/2000 [19:10:30<6:14:04, 20.33s/it]

Training_loss 0.55370,   Accuracy 0.79710


 45%|████▍     | 897/2000 [19:10:51<6:13:27, 20.32s/it]

Training_loss 0.55372,   Accuracy 0.79687


 45%|████▍     | 898/2000 [19:11:14<6:28:45, 21.17s/it]

Training_loss 0.55354,   Accuracy 0.79697


 45%|████▍     | 899/2000 [19:11:46<7:27:59, 24.41s/it]

Training_loss 0.55346,   Accuracy 0.79629


 45%|████▌     | 900/2000 [19:12:16<7:59:26, 26.15s/it]

Training_loss 0.55356,   Accuracy 0.79707


 45%|████▌     | 901/2000 [19:12:45<8:16:46, 27.12s/it]

Training_loss 0.55347,   Accuracy 0.79629


 45%|████▌     | 902/2000 [19:13:16<8:37:56, 28.30s/it]

Training_loss 0.55345,   Accuracy 0.79629


 45%|████▌     | 903/2000 [19:13:47<8:51:16, 29.06s/it]

Training_loss 0.55339,   Accuracy 0.79629


 45%|████▌     | 904/2000 [19:14:17<8:53:03, 29.18s/it]

Training_loss 0.55334,   Accuracy 0.79645


 45%|████▌     | 905/2000 [19:14:46<8:53:49, 29.25s/it]

Training_loss 0.55320,   Accuracy 0.79619


 45%|████▌     | 906/2000 [19:15:15<8:49:52, 29.06s/it]

Training_loss 0.55329,   Accuracy 0.79629


 45%|████▌     | 907/2000 [19:15:43<8:44:38, 28.80s/it]

Training_loss 0.55338,   Accuracy 0.79694


 45%|████▌     | 908/2000 [19:16:13<8:48:56, 29.06s/it]

Training_loss 0.55337,   Accuracy 0.79687


 45%|████▌     | 909/2000 [19:16:41<8:46:23, 28.95s/it]

Training_loss 0.55337,   Accuracy 0.79684


 46%|████▌     | 910/2000 [19:17:10<8:46:00, 28.95s/it]

Training_loss 0.55352,   Accuracy 0.79684


 46%|████▌     | 911/2000 [19:17:40<8:49:24, 29.17s/it]

Training_loss 0.55336,   Accuracy 0.79691


 46%|████▌     | 912/2000 [19:18:09<8:49:08, 29.18s/it]

Training_loss 0.55356,   Accuracy 0.79636


 46%|████▌     | 913/2000 [19:18:38<8:45:23, 29.00s/it]

Training_loss 0.55361,   Accuracy 0.79668


 46%|████▌     | 914/2000 [19:19:06<8:43:05, 28.90s/it]

Training_loss 0.55352,   Accuracy 0.79639


 46%|████▌     | 915/2000 [19:19:36<8:46:42, 29.13s/it]

Training_loss 0.55346,   Accuracy 0.79639


 46%|████▌     | 916/2000 [19:20:05<8:44:33, 29.03s/it]

Training_loss 0.55337,   Accuracy 0.79681


 46%|████▌     | 917/2000 [19:20:34<8:46:19, 29.16s/it]

Training_loss 0.55343,   Accuracy 0.79691


 46%|████▌     | 918/2000 [19:21:04<8:48:35, 29.31s/it]

Training_loss 0.55343,   Accuracy 0.79691


 46%|████▌     | 919/2000 [19:21:34<8:49:48, 29.41s/it]

Training_loss 0.55358,   Accuracy 0.79829


 46%|████▌     | 920/2000 [19:22:03<8:51:31, 29.53s/it]

Training_loss 0.55354,   Accuracy 0.79849


 46%|████▌     | 921/2000 [19:22:32<8:46:52, 29.30s/it]

Training_loss 0.55362,   Accuracy 0.79859


 46%|████▌     | 922/2000 [19:23:02<8:49:15, 29.46s/it]

Training_loss 0.55372,   Accuracy 0.79859


 46%|████▌     | 923/2000 [19:23:31<8:45:15, 29.26s/it]

Training_loss 0.55372,   Accuracy 0.79859


 46%|████▌     | 924/2000 [19:24:00<8:43:00, 29.16s/it]

Training_loss 0.55373,   Accuracy 0.79875


 46%|████▋     | 925/2000 [19:24:26<8:26:56, 28.29s/it]

Training_loss 0.55378,   Accuracy 0.79849


 46%|████▋     | 926/2000 [19:24:52<8:12:48, 27.53s/it]

Training_loss 0.55355,   Accuracy 0.79839


 46%|████▋     | 927/2000 [19:25:19<8:09:20, 27.36s/it]

Training_loss 0.55349,   Accuracy 0.79829


 46%|████▋     | 928/2000 [19:25:44<7:58:44, 26.79s/it]

Training_loss 0.55338,   Accuracy 0.79846


 46%|████▋     | 929/2000 [19:26:13<8:08:43, 27.38s/it]

Training_loss 0.55329,   Accuracy 0.79849


 46%|████▋     | 930/2000 [19:26:43<8:23:23, 28.23s/it]

Training_loss 0.55342,   Accuracy 0.79836


 47%|████▋     | 931/2000 [19:27:11<8:19:46, 28.05s/it]

Training_loss 0.55346,   Accuracy 0.79836


 47%|████▋     | 932/2000 [19:27:37<8:09:19, 27.49s/it]

Training_loss 0.55338,   Accuracy 0.79804


 47%|████▋     | 933/2000 [19:28:05<8:09:56, 27.55s/it]

Training_loss 0.55334,   Accuracy 0.79804


 47%|████▋     | 934/2000 [19:28:31<8:05:26, 27.32s/it]

Training_loss 0.55346,   Accuracy 0.79797


 47%|████▋     | 935/2000 [19:28:57<7:57:33, 26.90s/it]

Training_loss 0.55333,   Accuracy 0.79691


 47%|████▋     | 936/2000 [19:29:24<7:55:52, 26.84s/it]

Training_loss 0.55335,   Accuracy 0.79687


 47%|████▋     | 937/2000 [19:29:50<7:50:11, 26.54s/it]

Training_loss 0.55321,   Accuracy 0.79691


 47%|████▋     | 938/2000 [19:30:18<7:57:25, 26.97s/it]

Training_loss 0.55323,   Accuracy 0.79694


 47%|████▋     | 939/2000 [19:30:44<7:53:38, 26.78s/it]

Training_loss 0.55309,   Accuracy 0.79687


 47%|████▋     | 940/2000 [19:31:10<7:45:37, 26.36s/it]

Training_loss 0.55317,   Accuracy 0.79800


 47%|████▋     | 941/2000 [19:31:36<7:48:07, 26.52s/it]

Training_loss 0.55334,   Accuracy 0.79817


 47%|████▋     | 942/2000 [19:32:03<7:50:03, 26.66s/it]

Training_loss 0.55343,   Accuracy 0.79823


 47%|████▋     | 943/2000 [19:32:30<7:48:29, 26.59s/it]

Training_loss 0.55349,   Accuracy 0.79823


 47%|████▋     | 944/2000 [19:32:59<8:00:40, 27.31s/it]

Training_loss 0.55361,   Accuracy 0.79823


 47%|████▋     | 945/2000 [19:33:25<7:54:02, 26.96s/it]

Training_loss 0.55354,   Accuracy 0.79817


 47%|████▋     | 946/2000 [19:33:53<8:00:29, 27.35s/it]

Training_loss 0.55343,   Accuracy 0.79817


 47%|████▋     | 947/2000 [19:34:21<8:02:22, 27.49s/it]

Training_loss 0.55358,   Accuracy 0.79810


 47%|████▋     | 948/2000 [19:34:48<7:57:42, 27.25s/it]

Training_loss 0.55350,   Accuracy 0.79810


 47%|████▋     | 949/2000 [19:35:15<7:56:52, 27.22s/it]

Training_loss 0.55363,   Accuracy 0.79820


 48%|████▊     | 950/2000 [19:35:41<7:48:14, 26.76s/it]

Training_loss 0.55357,   Accuracy 0.79826


 48%|████▊     | 951/2000 [19:36:07<7:43:48, 26.53s/it]

Training_loss 0.55352,   Accuracy 0.79826


 48%|████▊     | 952/2000 [19:36:32<7:36:44, 26.15s/it]

Training_loss 0.55334,   Accuracy 0.79820


 48%|████▊     | 953/2000 [19:36:59<7:39:28, 26.33s/it]

Training_loss 0.55331,   Accuracy 0.79820


 48%|████▊     | 954/2000 [19:37:24<7:33:45, 26.03s/it]

Training_loss 0.55335,   Accuracy 0.79810


 48%|████▊     | 955/2000 [19:37:49<7:29:56, 25.83s/it]

Training_loss 0.55343,   Accuracy 0.79820


 48%|████▊     | 956/2000 [19:38:16<7:31:35, 25.95s/it]

Training_loss 0.55354,   Accuracy 0.79823


 48%|████▊     | 957/2000 [19:38:40<7:20:57, 25.37s/it]

Training_loss 0.55354,   Accuracy 0.79836


 48%|████▊     | 958/2000 [19:39:06<7:27:39, 25.78s/it]

Training_loss 0.55340,   Accuracy 0.79813


 48%|████▊     | 959/2000 [19:39:32<7:24:39, 25.63s/it]

Training_loss 0.55348,   Accuracy 0.79839


 48%|████▊     | 960/2000 [19:39:58<7:28:33, 25.88s/it]

Training_loss 0.55347,   Accuracy 0.79839


 48%|████▊     | 961/2000 [19:40:24<7:27:50, 25.86s/it]

Training_loss 0.55324,   Accuracy 0.79813


 48%|████▊     | 962/2000 [19:40:50<7:26:25, 25.80s/it]

Training_loss 0.55332,   Accuracy 0.79842


 48%|████▊     | 963/2000 [19:41:16<7:31:38, 26.13s/it]

Training_loss 0.55321,   Accuracy 0.79826


 48%|████▊     | 964/2000 [19:41:42<7:27:23, 25.91s/it]

Training_loss 0.55316,   Accuracy 0.79826


 48%|████▊     | 965/2000 [19:42:07<7:23:06, 25.69s/it]

Training_loss 0.55320,   Accuracy 0.79842


 48%|████▊     | 966/2000 [19:42:34<7:30:50, 26.16s/it]

Training_loss 0.55308,   Accuracy 0.79810


 48%|████▊     | 967/2000 [19:43:00<7:29:29, 26.11s/it]

Training_loss 0.55307,   Accuracy 0.79810


 48%|████▊     | 968/2000 [19:43:25<7:23:17, 25.77s/it]

Training_loss 0.55306,   Accuracy 0.79823


 48%|████▊     | 969/2000 [19:43:51<7:24:16, 25.86s/it]

Training_loss 0.55293,   Accuracy 0.79784


 48%|████▊     | 970/2000 [19:44:18<7:25:52, 25.97s/it]

Training_loss 0.55296,   Accuracy 0.79797


 49%|████▊     | 971/2000 [19:44:43<7:25:08, 25.96s/it]

Training_loss 0.55278,   Accuracy 0.79794


 49%|████▊     | 972/2000 [19:45:11<7:31:27, 26.35s/it]

Training_loss 0.55282,   Accuracy 0.79791


 49%|████▊     | 973/2000 [19:45:38<7:34:13, 26.54s/it]

Training_loss 0.55301,   Accuracy 0.79839


 49%|████▊     | 974/2000 [19:46:03<7:27:26, 26.17s/it]

Training_loss 0.55307,   Accuracy 0.79871


 49%|████▉     | 975/2000 [19:46:30<7:30:31, 26.37s/it]

Training_loss 0.55300,   Accuracy 0.79842


 49%|████▉     | 976/2000 [19:46:55<7:24:30, 26.05s/it]

Training_loss 0.55323,   Accuracy 0.79881


 49%|████▉     | 977/2000 [19:47:22<7:29:09, 26.34s/it]

Training_loss 0.55318,   Accuracy 0.79881


 49%|████▉     | 978/2000 [19:47:47<7:23:09, 26.02s/it]

Training_loss 0.55327,   Accuracy 0.80026


 49%|████▉     | 979/2000 [19:48:13<7:21:38, 25.95s/it]

Training_loss 0.55332,   Accuracy 0.80030


 49%|████▉     | 980/2000 [19:48:41<7:28:58, 26.41s/it]

Training_loss 0.55346,   Accuracy 0.80043


 49%|████▉     | 981/2000 [19:49:08<7:30:37, 26.53s/it]

Training_loss 0.55348,   Accuracy 0.80014


 49%|████▉     | 982/2000 [19:49:33<7:25:40, 26.27s/it]

Training_loss 0.55358,   Accuracy 0.80046


 49%|████▉     | 983/2000 [19:49:59<7:22:41, 26.12s/it]

Training_loss 0.55360,   Accuracy 0.80043


 49%|████▉     | 984/2000 [19:50:24<7:18:23, 25.89s/it]

Training_loss 0.55352,   Accuracy 0.80036


 49%|████▉     | 985/2000 [19:50:49<7:14:28, 25.68s/it]

Training_loss 0.55361,   Accuracy 0.80123


 49%|████▉     | 986/2000 [19:51:16<7:20:10, 26.05s/it]

Training_loss 0.55372,   Accuracy 0.80140


 49%|████▉     | 987/2000 [19:51:42<7:17:23, 25.91s/it]

Training_loss 0.55362,   Accuracy 0.80104


 49%|████▉     | 988/2000 [19:52:08<7:15:40, 25.83s/it]

Training_loss 0.55379,   Accuracy 0.80159


 49%|████▉     | 989/2000 [19:52:33<7:11:59, 25.64s/it]

Training_loss 0.55386,   Accuracy 0.80169


 50%|████▉     | 990/2000 [19:53:00<7:18:25, 26.05s/it]

Training_loss 0.55377,   Accuracy 0.80159


 50%|████▉     | 991/2000 [19:53:26<7:16:37, 25.96s/it]

Training_loss 0.55377,   Accuracy 0.80165


 50%|████▉     | 992/2000 [19:53:52<7:17:56, 26.07s/it]

Training_loss 0.55381,   Accuracy 0.80169


 50%|████▉     | 993/2000 [19:54:19<7:23:25, 26.42s/it]

Training_loss 0.55376,   Accuracy 0.80182


 50%|████▉     | 994/2000 [19:54:46<7:22:46, 26.41s/it]

Training_loss 0.55359,   Accuracy 0.80169


 50%|████▉     | 995/2000 [19:55:12<7:21:15, 26.34s/it]

Training_loss 0.55360,   Accuracy 0.80162


 50%|████▉     | 996/2000 [19:55:39<7:25:21, 26.61s/it]

Training_loss 0.55353,   Accuracy 0.80165


 50%|████▉     | 997/2000 [19:56:05<7:20:05, 26.33s/it]

Training_loss 0.55343,   Accuracy 0.80165


 50%|████▉     | 998/2000 [19:56:30<7:12:58, 25.93s/it]

Training_loss 0.55370,   Accuracy 0.80330


 50%|████▉     | 999/2000 [19:56:55<7:08:45, 25.70s/it]

Training_loss 0.55376,   Accuracy 0.80340


 50%|█████     | 1000/2000 [19:57:19<7:00:14, 25.21s/it]

Training_loss 0.55366,   Accuracy 0.80340


 50%|█████     | 1001/2000 [19:57:46<7:09:55, 25.82s/it]

Training_loss 0.55378,   Accuracy 0.80330


 50%|█████     | 1002/2000 [19:58:12<7:08:59, 25.79s/it]

Training_loss 0.55369,   Accuracy 0.80330


 50%|█████     | 1003/2000 [19:58:37<7:07:56, 25.75s/it]

Training_loss 0.55368,   Accuracy 0.80330


 50%|█████     | 1004/2000 [19:59:05<7:14:17, 26.16s/it]

Training_loss 0.55354,   Accuracy 0.80346


 50%|█████     | 1005/2000 [19:59:32<7:21:16, 26.61s/it]

Training_loss 0.55349,   Accuracy 0.80330


 50%|█████     | 1006/2000 [19:59:58<7:16:47, 26.37s/it]

Training_loss 0.55365,   Accuracy 0.80330


 50%|█████     | 1007/2000 [20:00:21<6:59:29, 25.35s/it]

Training_loss 0.55361,   Accuracy 0.80330


 50%|█████     | 1008/2000 [20:00:47<7:01:14, 25.48s/it]

Training_loss 0.55347,   Accuracy 0.80337


 50%|█████     | 1009/2000 [20:01:13<7:03:45, 25.66s/it]

Training_loss 0.55347,   Accuracy 0.80466


 50%|█████     | 1010/2000 [20:01:39<7:07:54, 25.93s/it]

Training_loss 0.55352,   Accuracy 0.80385


 51%|█████     | 1011/2000 [20:02:05<7:06:31, 25.88s/it]

Training_loss 0.55352,   Accuracy 0.80388


 51%|█████     | 1012/2000 [20:02:31<7:05:57, 25.87s/it]

Training_loss 0.55353,   Accuracy 0.80388


 51%|█████     | 1013/2000 [20:02:59<7:16:11, 26.52s/it]

Training_loss 0.55336,   Accuracy 0.80446


 51%|█████     | 1014/2000 [20:03:25<7:14:49, 26.46s/it]

Training_loss 0.55341,   Accuracy 0.80382


 51%|█████     | 1015/2000 [20:03:52<7:14:02, 26.44s/it]

Training_loss 0.55349,   Accuracy 0.80356


 51%|█████     | 1016/2000 [20:04:18<7:13:59, 26.46s/it]

Training_loss 0.55354,   Accuracy 0.80350


 51%|█████     | 1017/2000 [20:04:45<7:13:36, 26.47s/it]

Training_loss 0.55358,   Accuracy 0.80346


 51%|█████     | 1018/2000 [20:05:11<7:11:53, 26.39s/it]

Training_loss 0.55358,   Accuracy 0.80346


 51%|█████     | 1019/2000 [20:05:37<7:10:25, 26.33s/it]

Training_loss 0.55359,   Accuracy 0.80343


 51%|█████     | 1020/2000 [20:06:05<7:15:19, 26.65s/it]

Training_loss 0.55351,   Accuracy 0.80350


 51%|█████     | 1021/2000 [20:06:30<7:10:13, 26.37s/it]

Training_loss 0.55366,   Accuracy 0.80301


 51%|█████     | 1022/2000 [20:06:56<7:05:36, 26.11s/it]

Training_loss 0.55359,   Accuracy 0.80304


 51%|█████     | 1023/2000 [20:07:19<6:49:49, 25.17s/it]

Training_loss 0.55366,   Accuracy 0.80295


 51%|█████     | 1024/2000 [20:07:44<6:50:39, 25.25s/it]

Training_loss 0.55389,   Accuracy 0.80353


 51%|█████▏    | 1025/2000 [20:08:11<6:55:45, 25.59s/it]

Training_loss 0.55404,   Accuracy 0.80350


 51%|█████▏    | 1026/2000 [20:08:35<6:50:12, 25.27s/it]

Training_loss 0.55410,   Accuracy 0.80324


 51%|█████▏    | 1027/2000 [20:09:01<6:54:32, 25.56s/it]

Training_loss 0.55424,   Accuracy 0.80324


 51%|█████▏    | 1028/2000 [20:09:28<7:01:20, 26.01s/it]

Training_loss 0.55433,   Accuracy 0.80372


 51%|█████▏    | 1029/2000 [20:09:49<6:33:52, 24.34s/it]

Training_loss 0.55420,   Accuracy 0.80314


 52%|█████▏    | 1030/2000 [20:10:02<5:36:55, 20.84s/it]

Training_loss 0.55428,   Accuracy 0.80369


 52%|█████▏    | 1031/2000 [20:10:17<5:08:17, 19.09s/it]

Training_loss 0.55428,   Accuracy 0.80372


 52%|█████▏    | 1032/2000 [20:10:29<4:37:38, 17.21s/it]

Training_loss 0.55435,   Accuracy 0.80311


 52%|█████▏    | 1033/2000 [20:10:42<4:13:58, 15.76s/it]

Training_loss 0.55423,   Accuracy 0.80362


 52%|█████▏    | 1034/2000 [20:10:55<4:01:51, 15.02s/it]

Training_loss 0.55406,   Accuracy 0.80375


 52%|█████▏    | 1035/2000 [20:11:07<3:48:28, 14.21s/it]

Training_loss 0.55411,   Accuracy 0.80333


 52%|█████▏    | 1036/2000 [20:11:19<3:37:10, 13.52s/it]

Training_loss 0.55417,   Accuracy 0.80182


 52%|█████▏    | 1037/2000 [20:11:31<3:27:07, 12.90s/it]

Training_loss 0.55406,   Accuracy 0.80343


 52%|█████▏    | 1038/2000 [20:11:42<3:20:27, 12.50s/it]

Training_loss 0.55403,   Accuracy 0.80330


 52%|█████▏    | 1039/2000 [20:11:54<3:16:36, 12.27s/it]

Training_loss 0.55385,   Accuracy 0.80369


 52%|█████▏    | 1040/2000 [20:12:06<3:13:11, 12.07s/it]

Training_loss 0.55382,   Accuracy 0.80362


 52%|█████▏    | 1041/2000 [20:12:18<3:12:12, 12.03s/it]

Training_loss 0.55364,   Accuracy 0.80324


 52%|█████▏    | 1042/2000 [20:12:29<3:10:17, 11.92s/it]

Training_loss 0.55360,   Accuracy 0.80314


 52%|█████▏    | 1043/2000 [20:12:41<3:08:58, 11.85s/it]

Training_loss 0.55355,   Accuracy 0.80314


 52%|█████▏    | 1044/2000 [20:12:53<3:07:50, 11.79s/it]

Training_loss 0.55345,   Accuracy 0.80340


 52%|█████▏    | 1045/2000 [20:13:04<3:07:00, 11.75s/it]

Training_loss 0.55332,   Accuracy 0.80343


 52%|█████▏    | 1046/2000 [20:13:16<3:06:06, 11.71s/it]

Training_loss 0.55329,   Accuracy 0.80346


 52%|█████▏    | 1047/2000 [20:13:28<3:06:12, 11.72s/it]

Training_loss 0.55339,   Accuracy 0.80320


 52%|█████▏    | 1048/2000 [20:13:39<3:06:36, 11.76s/it]

Training_loss 0.55324,   Accuracy 0.80346


 52%|█████▏    | 1049/2000 [20:13:51<3:05:31, 11.71s/it]

Training_loss 0.55336,   Accuracy 0.80304


 52%|█████▎    | 1050/2000 [20:14:03<3:04:47, 11.67s/it]

Training_loss 0.55321,   Accuracy 0.80314


 53%|█████▎    | 1051/2000 [20:14:14<3:04:03, 11.64s/it]

Training_loss 0.55317,   Accuracy 0.80337


 53%|█████▎    | 1052/2000 [20:14:26<3:03:42, 11.63s/it]

Training_loss 0.55312,   Accuracy 0.80333


 53%|█████▎    | 1053/2000 [20:14:37<3:03:57, 11.66s/it]

Training_loss 0.55304,   Accuracy 0.80337


 53%|█████▎    | 1054/2000 [20:14:50<3:05:49, 11.79s/it]

Training_loss 0.55304,   Accuracy 0.80340


 53%|█████▎    | 1055/2000 [20:15:01<3:04:42, 11.73s/it]

Training_loss 0.55288,   Accuracy 0.80327


 53%|█████▎    | 1056/2000 [20:15:13<3:03:52, 11.69s/it]

Training_loss 0.55296,   Accuracy 0.80317


 53%|█████▎    | 1057/2000 [20:15:24<3:03:11, 11.66s/it]

Training_loss 0.55299,   Accuracy 0.80320


 53%|█████▎    | 1058/2000 [20:15:36<3:03:02, 11.66s/it]

Training_loss 0.55310,   Accuracy 0.80288


 53%|█████▎    | 1059/2000 [20:15:47<3:02:08, 11.61s/it]

Training_loss 0.55309,   Accuracy 0.80285


 53%|█████▎    | 1060/2000 [20:15:59<3:02:44, 11.66s/it]

Training_loss 0.55322,   Accuracy 0.80259


 53%|█████▎    | 1061/2000 [20:16:11<3:01:25, 11.59s/it]

Training_loss 0.55324,   Accuracy 0.80107


 53%|█████▎    | 1062/2000 [20:16:22<3:01:40, 11.62s/it]

Training_loss 0.55314,   Accuracy 0.80110


 53%|█████▎    | 1063/2000 [20:16:34<3:02:26, 11.68s/it]

Training_loss 0.55324,   Accuracy 0.80201


 53%|█████▎    | 1064/2000 [20:16:46<3:02:06, 11.67s/it]

Training_loss 0.55309,   Accuracy 0.80110


 53%|█████▎    | 1065/2000 [20:16:57<3:00:29, 11.58s/it]

Training_loss 0.55309,   Accuracy 0.80169


 53%|█████▎    | 1066/2000 [20:17:09<3:00:55, 11.62s/it]

Training_loss 0.55321,   Accuracy 0.80207


 53%|█████▎    | 1067/2000 [20:17:22<3:05:29, 11.93s/it]

Training_loss 0.55323,   Accuracy 0.80211


 53%|█████▎    | 1068/2000 [20:17:35<3:09:58, 12.23s/it]

Training_loss 0.55321,   Accuracy 0.80214


 53%|█████▎    | 1069/2000 [20:17:48<3:15:34, 12.60s/it]

Training_loss 0.55329,   Accuracy 0.80104


 54%|█████▎    | 1070/2000 [20:18:03<3:24:44, 13.21s/it]

Training_loss 0.55334,   Accuracy 0.80220


 54%|█████▎    | 1071/2000 [20:18:16<3:24:26, 13.20s/it]

Training_loss 0.55328,   Accuracy 0.80094


 54%|█████▎    | 1072/2000 [20:18:29<3:23:56, 13.19s/it]

Training_loss 0.55333,   Accuracy 0.80220


 54%|█████▎    | 1073/2000 [20:18:43<3:25:54, 13.33s/it]

Training_loss 0.55322,   Accuracy 0.80075


 54%|█████▎    | 1074/2000 [20:18:56<3:26:08, 13.36s/it]

Training_loss 0.55319,   Accuracy 0.80075


 54%|█████▍    | 1075/2000 [20:19:09<3:24:01, 13.23s/it]

Training_loss 0.55338,   Accuracy 0.80346


 54%|█████▍    | 1076/2000 [20:19:24<3:29:44, 13.62s/it]

Training_loss 0.55312,   Accuracy 0.80085


 54%|█████▍    | 1077/2000 [20:19:40<3:44:28, 14.59s/it]

Training_loss 0.55315,   Accuracy 0.80178


 54%|█████▍    | 1078/2000 [20:19:55<3:43:21, 14.54s/it]

Training_loss 0.55328,   Accuracy 0.80327


 54%|█████▍    | 1079/2000 [20:20:12<3:53:41, 15.22s/it]

Training_loss 0.55346,   Accuracy 0.80333


 54%|█████▍    | 1080/2000 [20:20:28<3:56:53, 15.45s/it]

Training_loss 0.55350,   Accuracy 0.80317


 54%|█████▍    | 1081/2000 [20:20:45<4:05:32, 16.03s/it]

Training_loss 0.55341,   Accuracy 0.80330


 54%|█████▍    | 1082/2000 [20:21:07<4:34:08, 17.92s/it]

Training_loss 0.55334,   Accuracy 0.80320


 54%|█████▍    | 1083/2000 [20:21:23<4:21:53, 17.14s/it]

Training_loss 0.55335,   Accuracy 0.80324


 54%|█████▍    | 1084/2000 [20:21:41<4:28:04, 17.56s/it]

Training_loss 0.55340,   Accuracy 0.80311


 54%|█████▍    | 1085/2000 [20:21:57<4:19:53, 17.04s/it]

Training_loss 0.55352,   Accuracy 0.80314


 54%|█████▍    | 1086/2000 [20:22:13<4:14:51, 16.73s/it]

Training_loss 0.55356,   Accuracy 0.80350


 54%|█████▍    | 1087/2000 [20:22:29<4:12:08, 16.57s/it]

Training_loss 0.55342,   Accuracy 0.80301


 54%|█████▍    | 1088/2000 [20:22:46<4:12:00, 16.58s/it]

Training_loss 0.55329,   Accuracy 0.80320


 54%|█████▍    | 1089/2000 [20:23:01<4:07:49, 16.32s/it]

Training_loss 0.55339,   Accuracy 0.80301


 55%|█████▍    | 1090/2000 [20:23:21<4:21:00, 17.21s/it]

Training_loss 0.55344,   Accuracy 0.80311


 55%|█████▍    | 1091/2000 [20:23:36<4:13:49, 16.75s/it]

Training_loss 0.55362,   Accuracy 0.80330


 55%|█████▍    | 1092/2000 [20:23:51<4:02:38, 16.03s/it]

Training_loss 0.55370,   Accuracy 0.80317


 55%|█████▍    | 1093/2000 [20:24:06<3:57:50, 15.73s/it]

Training_loss 0.55359,   Accuracy 0.80330


 55%|█████▍    | 1094/2000 [20:24:21<3:52:47, 15.42s/it]

Training_loss 0.55349,   Accuracy 0.80346


 55%|█████▍    | 1095/2000 [20:24:34<3:45:02, 14.92s/it]

Training_loss 0.55349,   Accuracy 0.80343


 55%|█████▍    | 1096/2000 [20:24:50<3:49:18, 15.22s/it]

Training_loss 0.55368,   Accuracy 0.80333


 55%|█████▍    | 1097/2000 [20:25:09<4:07:14, 16.43s/it]

Training_loss 0.55373,   Accuracy 0.80282


 55%|█████▍    | 1098/2000 [20:25:28<4:15:08, 16.97s/it]

Training_loss 0.55390,   Accuracy 0.80285


 55%|█████▍    | 1099/2000 [20:25:43<4:05:36, 16.36s/it]

Training_loss 0.55405,   Accuracy 0.80285


 55%|█████▌    | 1100/2000 [20:26:02<4:18:01, 17.20s/it]

Training_loss 0.55383,   Accuracy 0.80253


 55%|█████▌    | 1101/2000 [20:26:27<4:55:43, 19.74s/it]

Training_loss 0.55381,   Accuracy 0.80256


 55%|█████▌    | 1102/2000 [20:26:51<5:11:08, 20.79s/it]

Training_loss 0.55379,   Accuracy 0.80253


 55%|█████▌    | 1103/2000 [20:27:09<5:00:47, 20.12s/it]

Training_loss 0.55403,   Accuracy 0.80262


 55%|█████▌    | 1104/2000 [20:27:31<5:07:26, 20.59s/it]

Training_loss 0.55401,   Accuracy 0.80259


 55%|█████▌    | 1105/2000 [20:27:57<5:29:25, 22.08s/it]

Training_loss 0.55413,   Accuracy 0.80288


 55%|█████▌    | 1106/2000 [20:28:17<5:21:35, 21.58s/it]

Training_loss 0.55406,   Accuracy 0.80288


 55%|█████▌    | 1107/2000 [20:28:38<5:20:49, 21.56s/it]

Training_loss 0.55388,   Accuracy 0.80256


 55%|█████▌    | 1108/2000 [20:29:01<5:25:47, 21.91s/it]

Training_loss 0.55398,   Accuracy 0.80288


 55%|█████▌    | 1109/2000 [20:29:24<5:28:47, 22.14s/it]

Training_loss 0.55391,   Accuracy 0.80253


 56%|█████▌    | 1110/2000 [20:29:46<5:29:07, 22.19s/it]

Training_loss 0.55382,   Accuracy 0.80272


 56%|█████▌    | 1111/2000 [20:30:13<5:49:11, 23.57s/it]

Training_loss 0.55363,   Accuracy 0.80227


 56%|█████▌    | 1112/2000 [20:30:39<5:58:10, 24.20s/it]

Training_loss 0.55356,   Accuracy 0.80227


 56%|█████▌    | 1113/2000 [20:31:02<5:52:05, 23.82s/it]

Training_loss 0.55347,   Accuracy 0.80246


 56%|█████▌    | 1114/2000 [20:31:25<5:51:39, 23.81s/it]

Training_loss 0.55346,   Accuracy 0.80227


 56%|█████▌    | 1115/2000 [20:31:46<5:37:27, 22.88s/it]

Training_loss 0.55361,   Accuracy 0.80230


 56%|█████▌    | 1116/2000 [20:32:08<5:31:02, 22.47s/it]

Training_loss 0.55375,   Accuracy 0.80288


 56%|█████▌    | 1117/2000 [20:32:27<5:18:56, 21.67s/it]

Training_loss 0.55365,   Accuracy 0.80262


 56%|█████▌    | 1118/2000 [20:32:47<5:10:21, 21.11s/it]

Training_loss 0.55356,   Accuracy 0.80227


 56%|█████▌    | 1119/2000 [20:33:07<5:03:34, 20.68s/it]

Training_loss 0.55357,   Accuracy 0.80233


 56%|█████▌    | 1120/2000 [20:33:27<5:00:23, 20.48s/it]

Training_loss 0.55345,   Accuracy 0.80230


 56%|█████▌    | 1121/2000 [20:33:50<5:13:53, 21.43s/it]

Training_loss 0.55352,   Accuracy 0.80243


 56%|█████▌    | 1122/2000 [20:34:14<5:20:50, 21.93s/it]

Training_loss 0.55348,   Accuracy 0.80243


 56%|█████▌    | 1123/2000 [20:34:34<5:12:29, 21.38s/it]

Training_loss 0.55356,   Accuracy 0.80246


 56%|█████▌    | 1124/2000 [20:34:55<5:10:11, 21.25s/it]

Training_loss 0.55336,   Accuracy 0.80230


 56%|█████▋    | 1125/2000 [20:35:19<5:22:28, 22.11s/it]

Training_loss 0.55338,   Accuracy 0.80243


 56%|█████▋    | 1126/2000 [20:35:46<5:43:50, 23.60s/it]

Training_loss 0.55351,   Accuracy 0.80285


 56%|█████▋    | 1127/2000 [20:36:10<5:47:35, 23.89s/it]

Training_loss 0.55329,   Accuracy 0.80259


 56%|█████▋    | 1128/2000 [20:36:33<5:40:41, 23.44s/it]

Training_loss 0.55343,   Accuracy 0.80285


 56%|█████▋    | 1129/2000 [20:36:53<5:26:09, 22.47s/it]

Training_loss 0.55352,   Accuracy 0.80278


 56%|█████▋    | 1130/2000 [20:37:12<5:12:49, 21.57s/it]

Training_loss 0.55375,   Accuracy 0.80266


 57%|█████▋    | 1131/2000 [20:37:32<5:05:05, 21.07s/it]

Training_loss 0.55359,   Accuracy 0.80282


 57%|█████▋    | 1132/2000 [20:37:55<5:12:37, 21.61s/it]

Training_loss 0.55377,   Accuracy 0.80159


 57%|█████▋    | 1133/2000 [20:38:15<5:06:16, 21.20s/it]

Training_loss 0.55374,   Accuracy 0.80162


 57%|█████▋    | 1134/2000 [20:38:36<5:04:58, 21.13s/it]

Training_loss 0.55374,   Accuracy 0.80159


 57%|█████▋    | 1135/2000 [20:38:58<5:05:37, 21.20s/it]

Training_loss 0.55387,   Accuracy 0.80185


 57%|█████▋    | 1136/2000 [20:39:19<5:06:58, 21.32s/it]

Training_loss 0.55393,   Accuracy 0.80169


 57%|█████▋    | 1137/2000 [20:39:41<5:07:18, 21.37s/it]

Training_loss 0.55389,   Accuracy 0.80165


 57%|█████▋    | 1138/2000 [20:40:01<5:03:37, 21.13s/it]

Training_loss 0.55378,   Accuracy 0.80159


 57%|█████▋    | 1139/2000 [20:40:21<4:57:46, 20.75s/it]

Training_loss 0.55370,   Accuracy 0.80169


 57%|█████▋    | 1140/2000 [20:40:48<5:21:29, 22.43s/it]

Training_loss 0.55359,   Accuracy 0.80182


 57%|█████▋    | 1141/2000 [20:41:10<5:20:14, 22.37s/it]

Training_loss 0.55335,   Accuracy 0.80291


 57%|█████▋    | 1142/2000 [20:41:31<5:13:50, 21.95s/it]

Training_loss 0.55321,   Accuracy 0.80288


 57%|█████▋    | 1143/2000 [20:41:53<5:15:23, 22.08s/it]

Training_loss 0.55301,   Accuracy 0.80272


 57%|█████▋    | 1144/2000 [20:42:14<5:07:49, 21.58s/it]

Training_loss 0.55294,   Accuracy 0.80275


 57%|█████▋    | 1145/2000 [20:42:35<5:08:30, 21.65s/it]

Training_loss 0.55309,   Accuracy 0.80272


 57%|█████▋    | 1146/2000 [20:42:59<5:15:05, 22.14s/it]

Training_loss 0.55316,   Accuracy 0.80288


 57%|█████▋    | 1147/2000 [20:43:20<5:09:49, 21.79s/it]

Training_loss 0.55313,   Accuracy 0.80285


 57%|█████▋    | 1148/2000 [20:43:40<5:04:59, 21.48s/it]

Training_loss 0.55303,   Accuracy 0.80282


 57%|█████▋    | 1149/2000 [20:44:02<5:06:01, 21.58s/it]

Training_loss 0.55283,   Accuracy 0.80275


 57%|█████▊    | 1150/2000 [20:44:23<5:02:29, 21.35s/it]

Training_loss 0.55276,   Accuracy 0.80253


 58%|█████▊    | 1151/2000 [20:44:46<5:08:32, 21.81s/it]

Training_loss 0.55271,   Accuracy 0.80233


 58%|█████▊    | 1152/2000 [20:45:07<5:05:16, 21.60s/it]

Training_loss 0.55279,   Accuracy 0.80272


 58%|█████▊    | 1153/2000 [20:45:28<5:02:46, 21.45s/it]

Training_loss 0.55284,   Accuracy 0.80259


 58%|█████▊    | 1154/2000 [20:45:49<5:00:08, 21.29s/it]

Training_loss 0.55295,   Accuracy 0.80243


 58%|█████▊    | 1155/2000 [20:46:10<4:59:01, 21.23s/it]

Training_loss 0.55296,   Accuracy 0.80140


 58%|█████▊    | 1156/2000 [20:46:31<4:56:22, 21.07s/it]

Training_loss 0.55289,   Accuracy 0.80262


 58%|█████▊    | 1157/2000 [20:46:52<4:55:18, 21.02s/it]

Training_loss 0.55297,   Accuracy 0.80127


 58%|█████▊    | 1158/2000 [20:47:13<4:55:38, 21.07s/it]

Training_loss 0.55300,   Accuracy 0.80123


 58%|█████▊    | 1159/2000 [20:47:34<4:53:26, 20.94s/it]

Training_loss 0.55325,   Accuracy 0.80136


 58%|█████▊    | 1160/2000 [20:47:55<4:54:42, 21.05s/it]

Training_loss 0.55341,   Accuracy 0.80159


 58%|█████▊    | 1161/2000 [20:48:16<4:55:17, 21.12s/it]

Training_loss 0.55341,   Accuracy 0.80162


 58%|█████▊    | 1162/2000 [20:48:37<4:53:14, 21.00s/it]

Training_loss 0.55327,   Accuracy 0.80149


 58%|█████▊    | 1163/2000 [20:48:58<4:53:32, 21.04s/it]

Training_loss 0.55333,   Accuracy 0.80149


 58%|█████▊    | 1164/2000 [20:49:20<4:57:10, 21.33s/it]

Training_loss 0.55346,   Accuracy 0.80172


 58%|█████▊    | 1165/2000 [20:49:42<4:58:18, 21.44s/it]

Training_loss 0.55366,   Accuracy 0.80143


 58%|█████▊    | 1166/2000 [20:50:03<4:55:38, 21.27s/it]

Training_loss 0.55354,   Accuracy 0.80162


 58%|█████▊    | 1167/2000 [20:50:23<4:53:16, 21.12s/it]

Training_loss 0.55368,   Accuracy 0.80123


 58%|█████▊    | 1168/2000 [20:50:44<4:52:08, 21.07s/it]

Training_loss 0.55342,   Accuracy 0.80162


 58%|█████▊    | 1169/2000 [20:51:04<4:46:27, 20.68s/it]

Training_loss 0.55327,   Accuracy 0.80152


 58%|█████▊    | 1170/2000 [20:51:24<4:42:26, 20.42s/it]

Training_loss 0.55332,   Accuracy 0.80172


 59%|█████▊    | 1171/2000 [20:51:44<4:40:35, 20.31s/it]

Training_loss 0.55316,   Accuracy 0.80152


 59%|█████▊    | 1172/2000 [20:52:04<4:37:58, 20.14s/it]

Training_loss 0.55302,   Accuracy 0.80149


 59%|█████▊    | 1173/2000 [20:52:23<4:34:44, 19.93s/it]

Training_loss 0.55315,   Accuracy 0.80159


 59%|█████▊    | 1174/2000 [20:52:43<4:32:41, 19.81s/it]

Training_loss 0.55320,   Accuracy 0.80156


 59%|█████▉    | 1175/2000 [20:53:03<4:35:49, 20.06s/it]

Training_loss 0.55354,   Accuracy 0.80156


 59%|█████▉    | 1176/2000 [20:53:23<4:34:28, 19.99s/it]

Training_loss 0.55354,   Accuracy 0.80156


 59%|█████▉    | 1177/2000 [20:53:44<4:37:43, 20.25s/it]

Training_loss 0.55359,   Accuracy 0.80149


 59%|█████▉    | 1178/2000 [20:54:04<4:34:24, 20.03s/it]

Training_loss 0.55363,   Accuracy 0.80110


 59%|█████▉    | 1179/2000 [20:54:24<4:35:32, 20.14s/it]

Training_loss 0.55363,   Accuracy 0.80123


 59%|█████▉    | 1180/2000 [20:54:44<4:35:52, 20.19s/it]

Training_loss 0.55373,   Accuracy 0.80104


 59%|█████▉    | 1181/2000 [20:55:04<4:32:57, 20.00s/it]

Training_loss 0.55377,   Accuracy 0.80101


 59%|█████▉    | 1182/2000 [20:55:24<4:35:36, 20.22s/it]

Training_loss 0.55398,   Accuracy 0.80162


 59%|█████▉    | 1183/2000 [20:57:05<10:01:43, 44.19s/it]

Training_loss 0.55410,   Accuracy 0.79946


 59%|█████▉    | 1184/2000 [20:59:06<15:17:48, 67.49s/it]

Training_loss 0.55392,   Accuracy 0.80159


 59%|█████▉    | 1185/2000 [21:01:14<19:21:14, 85.49s/it]

Training_loss 0.55395,   Accuracy 0.80117


 59%|█████▉    | 1186/2000 [21:03:35<23:07:41, 102.29s/it]

Training_loss 0.55391,   Accuracy 0.80130


 59%|█████▉    | 1187/2000 [21:06:07<26:27:19, 117.15s/it]

Training_loss 0.55396,   Accuracy 0.79968


 59%|█████▉    | 1188/2000 [21:08:24<27:45:57, 123.10s/it]

Training_loss 0.55398,   Accuracy 0.79949


 59%|█████▉    | 1189/2000 [21:10:33<28:04:50, 124.65s/it]

Training_loss 0.55397,   Accuracy 0.79959


 60%|█████▉    | 1190/2000 [21:12:40<28:14:11, 125.50s/it]

Training_loss 0.55392,   Accuracy 0.80110


 60%|█████▉    | 1191/2000 [21:14:45<28:09:45, 125.32s/it]

Training_loss 0.55390,   Accuracy 0.80114


 60%|█████▉    | 1192/2000 [21:17:01<28:52:12, 128.63s/it]

Training_loss 0.55384,   Accuracy 0.80110


 60%|█████▉    | 1193/2000 [21:19:11<28:55:18, 129.02s/it]

Training_loss 0.55399,   Accuracy 0.79936


 60%|█████▉    | 1194/2000 [21:21:27<29:20:56, 131.09s/it]

Training_loss 0.55405,   Accuracy 0.79881


 60%|█████▉    | 1195/2000 [21:22:46<25:46:51, 115.29s/it]

Training_loss 0.55388,   Accuracy 0.80072


 60%|█████▉    | 1196/2000 [21:23:08<19:31:16, 87.41s/it] 

Training_loss 0.55370,   Accuracy 0.80098


 60%|█████▉    | 1197/2000 [21:23:32<15:14:52, 68.36s/it]

Training_loss 0.55355,   Accuracy 0.80110


 60%|█████▉    | 1198/2000 [21:23:51<11:55:08, 53.50s/it]

Training_loss 0.55366,   Accuracy 0.80098


 60%|█████▉    | 1199/2000 [21:24:10<9:38:28, 43.33s/it] 

Training_loss 0.55371,   Accuracy 0.80101


 60%|██████    | 1200/2000 [21:24:30<8:02:33, 36.19s/it]

Training_loss 0.55350,   Accuracy 0.80098


 60%|██████    | 1201/2000 [21:24:47<6:44:26, 30.37s/it]

Training_loss 0.55354,   Accuracy 0.80104


 60%|██████    | 1202/2000 [21:25:05<5:58:20, 26.94s/it]

Training_loss 0.55350,   Accuracy 0.80104


 60%|██████    | 1203/2000 [21:25:24<5:24:06, 24.40s/it]

Training_loss 0.55347,   Accuracy 0.80110


 60%|██████    | 1204/2000 [21:25:42<4:56:32, 22.35s/it]

Training_loss 0.55350,   Accuracy 0.80110


 60%|██████    | 1205/2000 [21:26:02<4:50:15, 21.91s/it]

Training_loss 0.55344,   Accuracy 0.80114


 60%|██████    | 1206/2000 [21:26:28<5:06:11, 23.14s/it]

Training_loss 0.55335,   Accuracy 0.80091


 60%|██████    | 1207/2000 [21:26:53<5:10:03, 23.46s/it]

Training_loss 0.55357,   Accuracy 0.80049


 60%|██████    | 1208/2000 [21:27:15<5:04:10, 23.04s/it]

Training_loss 0.55344,   Accuracy 0.80072


 60%|██████    | 1209/2000 [21:27:35<4:52:41, 22.20s/it]

Training_loss 0.55352,   Accuracy 0.80052


 60%|██████    | 1210/2000 [21:27:55<4:44:11, 21.58s/it]

Training_loss 0.55348,   Accuracy 0.80049


 61%|██████    | 1211/2000 [21:28:15<4:37:21, 21.09s/it]

Training_loss 0.55350,   Accuracy 0.79913


 61%|██████    | 1212/2000 [21:28:36<4:35:04, 20.95s/it]

Training_loss 0.55365,   Accuracy 0.79875


 61%|██████    | 1213/2000 [21:28:56<4:33:11, 20.83s/it]

Training_loss 0.55372,   Accuracy 0.79875


 61%|██████    | 1214/2000 [21:29:28<5:17:29, 24.24s/it]

Training_loss 0.55367,   Accuracy 0.79881


 61%|██████    | 1215/2000 [21:30:03<5:58:52, 27.43s/it]

Training_loss 0.55387,   Accuracy 0.79862


 61%|██████    | 1216/2000 [21:30:30<5:55:28, 27.21s/it]

Training_loss 0.55391,   Accuracy 0.79710


 61%|██████    | 1217/2000 [21:31:00<6:07:10, 28.14s/it]

Training_loss 0.55405,   Accuracy 0.79658


 61%|██████    | 1218/2000 [21:31:25<5:53:04, 27.09s/it]

Training_loss 0.55393,   Accuracy 0.79707


 61%|██████    | 1219/2000 [21:31:56<6:06:27, 28.15s/it]

Training_loss 0.55400,   Accuracy 0.79674


 61%|██████    | 1220/2000 [21:32:22<6:00:34, 27.74s/it]

Training_loss 0.55399,   Accuracy 0.79707


 61%|██████    | 1221/2000 [21:32:53<6:11:08, 28.59s/it]

Training_loss 0.55414,   Accuracy 0.79581


 61%|██████    | 1222/2000 [21:33:31<6:47:13, 31.41s/it]

Training_loss 0.55415,   Accuracy 0.79584


 61%|██████    | 1223/2000 [21:34:01<6:42:33, 31.09s/it]

Training_loss 0.55408,   Accuracy 0.79584


 61%|██████    | 1224/2000 [21:34:22<6:02:24, 28.02s/it]

Training_loss 0.55400,   Accuracy 0.79661


 61%|██████▏   | 1225/2000 [21:34:44<5:39:23, 26.28s/it]

Training_loss 0.55388,   Accuracy 0.79681


 61%|██████▏   | 1226/2000 [21:35:05<5:17:26, 24.61s/it]

Training_loss 0.55389,   Accuracy 0.79681


 61%|██████▏   | 1227/2000 [21:35:23<4:53:08, 22.75s/it]

Training_loss 0.55382,   Accuracy 0.79707


 61%|██████▏   | 1228/2000 [21:35:38<4:19:32, 20.17s/it]

Training_loss 0.55375,   Accuracy 0.79700


 61%|██████▏   | 1229/2000 [21:35:52<3:57:18, 18.47s/it]

Training_loss 0.55352,   Accuracy 0.79842


 62%|██████▏   | 1230/2000 [21:36:07<3:43:00, 17.38s/it]

Training_loss 0.55359,   Accuracy 0.79697


 62%|██████▏   | 1231/2000 [21:36:21<3:30:42, 16.44s/it]

Training_loss 0.55376,   Accuracy 0.79652


 62%|██████▏   | 1232/2000 [21:36:36<3:24:15, 15.96s/it]

Training_loss 0.55366,   Accuracy 0.79684


 62%|██████▏   | 1233/2000 [21:36:52<3:22:40, 15.85s/it]

Training_loss 0.55358,   Accuracy 0.79661


 62%|██████▏   | 1234/2000 [21:37:10<3:31:06, 16.54s/it]

Training_loss 0.55346,   Accuracy 0.79700


 62%|██████▏   | 1235/2000 [21:37:28<3:35:47, 16.92s/it]

Training_loss 0.55348,   Accuracy 0.79707


 62%|██████▏   | 1236/2000 [21:37:42<3:27:32, 16.30s/it]

Training_loss 0.55339,   Accuracy 0.79700


 62%|██████▏   | 1237/2000 [21:38:04<3:46:43, 17.83s/it]

Training_loss 0.55328,   Accuracy 0.79700


 62%|██████▏   | 1238/2000 [21:38:21<3:44:15, 17.66s/it]

Training_loss 0.55302,   Accuracy 0.79878


 62%|██████▏   | 1239/2000 [21:38:42<3:58:03, 18.77s/it]

Training_loss 0.55307,   Accuracy 0.79881


 62%|██████▏   | 1240/2000 [21:39:02<4:00:32, 18.99s/it]

Training_loss 0.55298,   Accuracy 0.79884


 62%|██████▏   | 1241/2000 [21:39:16<3:43:19, 17.65s/it]

Training_loss 0.55302,   Accuracy 0.79865


 62%|██████▏   | 1242/2000 [21:39:33<3:38:53, 17.33s/it]

Training_loss 0.55294,   Accuracy 0.79878


 62%|██████▏   | 1243/2000 [21:39:51<3:41:23, 17.55s/it]

Training_loss 0.55310,   Accuracy 0.79878


 62%|██████▏   | 1244/2000 [21:40:06<3:29:42, 16.64s/it]

Training_loss 0.55314,   Accuracy 0.79678


 62%|██████▏   | 1245/2000 [21:40:21<3:26:12, 16.39s/it]

Training_loss 0.55307,   Accuracy 0.79842


 62%|██████▏   | 1246/2000 [21:40:36<3:20:04, 15.92s/it]

Training_loss 0.55304,   Accuracy 0.79846


 62%|██████▏   | 1247/2000 [21:40:58<3:43:15, 17.79s/it]

Training_loss 0.55323,   Accuracy 0.79665


 62%|██████▏   | 1248/2000 [21:41:26<4:19:22, 20.69s/it]

Training_loss 0.55321,   Accuracy 0.79665


 62%|██████▏   | 1249/2000 [21:41:50<4:31:30, 21.69s/it]

Training_loss 0.55328,   Accuracy 0.79639


 62%|██████▎   | 1250/2000 [21:42:09<4:21:06, 20.89s/it]

Training_loss 0.55349,   Accuracy 0.79561


 63%|██████▎   | 1251/2000 [21:42:25<4:04:22, 19.58s/it]

Training_loss 0.55357,   Accuracy 0.79587


 63%|██████▎   | 1252/2000 [21:42:42<3:52:41, 18.67s/it]

Training_loss 0.55364,   Accuracy 0.79610


 63%|██████▎   | 1253/2000 [21:42:56<3:36:17, 17.37s/it]

Training_loss 0.55353,   Accuracy 0.79581


 63%|██████▎   | 1254/2000 [21:43:16<3:43:27, 17.97s/it]

Training_loss 0.55338,   Accuracy 0.79561


 63%|██████▎   | 1255/2000 [21:43:38<3:58:25, 19.20s/it]

Training_loss 0.55331,   Accuracy 0.79565


 63%|██████▎   | 1256/2000 [21:43:54<3:47:48, 18.37s/it]

Training_loss 0.55339,   Accuracy 0.79561


 63%|██████▎   | 1257/2000 [21:44:28<4:46:02, 23.10s/it]

Training_loss 0.55309,   Accuracy 0.79571


 63%|██████▎   | 1258/2000 [21:44:55<4:59:36, 24.23s/it]

Training_loss 0.55289,   Accuracy 0.79691


 63%|██████▎   | 1259/2000 [21:45:18<4:53:05, 23.73s/it]

Training_loss 0.55264,   Accuracy 0.79839


 63%|██████▎   | 1260/2000 [21:45:39<4:43:38, 23.00s/it]

Training_loss 0.55245,   Accuracy 0.79855


 63%|██████▎   | 1261/2000 [21:46:02<4:41:29, 22.85s/it]

Training_loss 0.55250,   Accuracy 0.79836


 63%|██████▎   | 1262/2000 [21:46:18<4:17:52, 20.97s/it]

Training_loss 0.55261,   Accuracy 0.79839


 63%|██████▎   | 1263/2000 [21:46:32<3:49:55, 18.72s/it]

Training_loss 0.55273,   Accuracy 0.79694


 63%|██████▎   | 1264/2000 [21:46:46<3:34:10, 17.46s/it]

Training_loss 0.55269,   Accuracy 0.79713


 63%|██████▎   | 1265/2000 [21:47:03<3:33:17, 17.41s/it]

Training_loss 0.55253,   Accuracy 0.79839


 63%|██████▎   | 1266/2000 [21:47:27<3:56:10, 19.31s/it]

Training_loss 0.55237,   Accuracy 0.79842


 63%|██████▎   | 1267/2000 [21:47:46<3:53:43, 19.13s/it]

Training_loss 0.55249,   Accuracy 0.79839


 63%|██████▎   | 1268/2000 [21:48:00<3:34:51, 17.61s/it]

Training_loss 0.55257,   Accuracy 0.79694


 63%|██████▎   | 1269/2000 [21:48:17<3:32:49, 17.47s/it]

Training_loss 0.55268,   Accuracy 0.79681


 64%|██████▎   | 1270/2000 [21:48:35<3:33:41, 17.56s/it]

Training_loss 0.55286,   Accuracy 0.79613


 64%|██████▎   | 1271/2000 [21:48:51<3:27:01, 17.04s/it]

Training_loss 0.55287,   Accuracy 0.79642


 64%|██████▎   | 1272/2000 [21:49:09<3:31:41, 17.45s/it]

Training_loss 0.55290,   Accuracy 0.79574


 64%|██████▎   | 1273/2000 [21:49:24<3:23:01, 16.76s/it]

Training_loss 0.55313,   Accuracy 0.79568


 64%|██████▎   | 1274/2000 [21:49:41<3:23:57, 16.86s/it]

Training_loss 0.55325,   Accuracy 0.79565


 64%|██████▍   | 1275/2000 [21:49:59<3:28:23, 17.25s/it]

Training_loss 0.55329,   Accuracy 0.79568


 64%|██████▍   | 1276/2000 [21:50:16<3:25:34, 17.04s/it]

Training_loss 0.55350,   Accuracy 0.79513


 64%|██████▍   | 1277/2000 [21:50:34<3:29:54, 17.42s/it]

Training_loss 0.55351,   Accuracy 0.79506


 64%|██████▍   | 1278/2000 [21:50:54<3:37:29, 18.07s/it]

Training_loss 0.55339,   Accuracy 0.79500


 64%|██████▍   | 1279/2000 [21:51:10<3:30:28, 17.52s/it]

Training_loss 0.55338,   Accuracy 0.79500


 64%|██████▍   | 1280/2000 [21:51:27<3:27:31, 17.29s/it]

Training_loss 0.55356,   Accuracy 0.79371


 64%|██████▍   | 1281/2000 [21:51:45<3:28:32, 17.40s/it]

Training_loss 0.55354,   Accuracy 0.79367


 64%|██████▍   | 1282/2000 [21:52:06<3:41:37, 18.52s/it]

Training_loss 0.55338,   Accuracy 0.79510


 64%|██████▍   | 1283/2000 [21:52:23<3:35:41, 18.05s/it]

Training_loss 0.55341,   Accuracy 0.79497


 64%|██████▍   | 1284/2000 [21:52:39<3:30:09, 17.61s/it]

Training_loss 0.55338,   Accuracy 0.79497


 64%|██████▍   | 1285/2000 [21:52:55<3:24:22, 17.15s/it]

Training_loss 0.55345,   Accuracy 0.79367


 64%|██████▍   | 1286/2000 [21:53:11<3:17:57, 16.64s/it]

Training_loss 0.55334,   Accuracy 0.79384


 64%|██████▍   | 1287/2000 [21:53:28<3:20:30, 16.87s/it]

Training_loss 0.55343,   Accuracy 0.79364


 64%|██████▍   | 1288/2000 [21:53:45<3:20:53, 16.93s/it]

Training_loss 0.55355,   Accuracy 0.79342


 64%|██████▍   | 1289/2000 [21:54:00<3:13:58, 16.37s/it]

Training_loss 0.55342,   Accuracy 0.79351


 64%|██████▍   | 1290/2000 [21:54:15<3:08:10, 15.90s/it]

Training_loss 0.55335,   Accuracy 0.79358


 65%|██████▍   | 1291/2000 [21:54:30<3:03:58, 15.57s/it]

Training_loss 0.55348,   Accuracy 0.79348


 65%|██████▍   | 1292/2000 [21:54:45<3:01:09, 15.35s/it]

Training_loss 0.55347,   Accuracy 0.79338


 65%|██████▍   | 1293/2000 [21:55:00<3:00:17, 15.30s/it]

Training_loss 0.55337,   Accuracy 0.79338


 65%|██████▍   | 1294/2000 [21:55:14<2:55:45, 14.94s/it]

Training_loss 0.55331,   Accuracy 0.79355


 65%|██████▍   | 1295/2000 [21:55:29<2:54:55, 14.89s/it]

Training_loss 0.55315,   Accuracy 0.79364


 65%|██████▍   | 1296/2000 [21:55:43<2:52:08, 14.67s/it]

Training_loss 0.55299,   Accuracy 0.79387


 65%|██████▍   | 1297/2000 [21:55:57<2:49:54, 14.50s/it]

Training_loss 0.55310,   Accuracy 0.79364


 65%|██████▍   | 1298/2000 [21:56:11<2:48:49, 14.43s/it]

Training_loss 0.55298,   Accuracy 0.79367


 65%|██████▍   | 1299/2000 [21:56:25<2:47:00, 14.29s/it]

Training_loss 0.55277,   Accuracy 0.79500


 65%|██████▌   | 1300/2000 [21:56:40<2:49:16, 14.51s/it]

Training_loss 0.55259,   Accuracy 0.79503


 65%|██████▌   | 1301/2000 [21:56:57<2:57:03, 15.20s/it]

Training_loss 0.55251,   Accuracy 0.79506


 65%|██████▌   | 1302/2000 [21:57:16<3:08:56, 16.24s/it]

Training_loss 0.55255,   Accuracy 0.79503


 65%|██████▌   | 1303/2000 [21:57:33<3:12:05, 16.54s/it]

Training_loss 0.55266,   Accuracy 0.79497


 65%|██████▌   | 1304/2000 [21:57:49<3:09:02, 16.30s/it]

Training_loss 0.55275,   Accuracy 0.79358


 65%|██████▌   | 1305/2000 [21:58:03<3:00:43, 15.60s/it]

Training_loss 0.55294,   Accuracy 0.79351


 65%|██████▌   | 1306/2000 [21:58:16<2:54:12, 15.06s/it]

Training_loss 0.55285,   Accuracy 0.79351


 65%|██████▌   | 1307/2000 [21:58:31<2:52:03, 14.90s/it]

Training_loss 0.55285,   Accuracy 0.79351


 65%|██████▌   | 1308/2000 [21:58:46<2:50:48, 14.81s/it]

Training_loss 0.55286,   Accuracy 0.79355


 65%|██████▌   | 1309/2000 [21:59:01<2:51:09, 14.86s/it]

Training_loss 0.55284,   Accuracy 0.79325


 66%|██████▌   | 1310/2000 [21:59:15<2:49:36, 14.75s/it]

Training_loss 0.55279,   Accuracy 0.79325


 66%|██████▌   | 1311/2000 [21:59:32<2:57:07, 15.42s/it]

Training_loss 0.55297,   Accuracy 0.79332


 66%|██████▌   | 1312/2000 [21:59:52<3:10:49, 16.64s/it]

Training_loss 0.55301,   Accuracy 0.79229


 66%|██████▌   | 1313/2000 [22:00:09<3:13:07, 16.87s/it]

Training_loss 0.55305,   Accuracy 0.79225


 66%|██████▌   | 1314/2000 [22:00:23<3:03:05, 16.01s/it]

Training_loss 0.55303,   Accuracy 0.79225


 66%|██████▌   | 1315/2000 [22:00:37<2:57:19, 15.53s/it]

Training_loss 0.55317,   Accuracy 0.79196


 66%|██████▌   | 1316/2000 [22:00:51<2:50:49, 14.98s/it]

Training_loss 0.55321,   Accuracy 0.79196


 66%|██████▌   | 1317/2000 [22:01:05<2:48:16, 14.78s/it]

Training_loss 0.55298,   Accuracy 0.79225


 66%|██████▌   | 1318/2000 [22:01:21<2:50:12, 14.97s/it]

Training_loss 0.55312,   Accuracy 0.79200


 66%|██████▌   | 1319/2000 [22:01:34<2:44:21, 14.48s/it]

Training_loss 0.55315,   Accuracy 0.79196


 66%|██████▌   | 1320/2000 [22:02:28<4:58:40, 26.35s/it]

Training_loss 0.55315,   Accuracy 0.79187


 66%|██████▌   | 1321/2000 [22:04:57<11:53:03, 63.01s/it]

Training_loss 0.55326,   Accuracy 0.79151


 66%|██████▌   | 1322/2000 [22:07:16<16:09:23, 85.79s/it]

Training_loss 0.55318,   Accuracy 0.79148


 66%|██████▌   | 1323/2000 [22:10:45<23:05:27, 122.79s/it]

Training_loss 0.55313,   Accuracy 0.79187


 66%|██████▌   | 1324/2000 [22:12:54<23:25:31, 124.75s/it]

Training_loss 0.55318,   Accuracy 0.79148


 66%|██████▋   | 1325/2000 [22:14:42<22:25:46, 119.62s/it]

Training_loss 0.55317,   Accuracy 0.79158


 66%|██████▋   | 1326/2000 [22:16:30<21:46:24, 116.30s/it]

Training_loss 0.55338,   Accuracy 0.79141


 66%|██████▋   | 1327/2000 [22:18:22<21:28:43, 114.89s/it]

Training_loss 0.55328,   Accuracy 0.79145


 66%|██████▋   | 1328/2000 [22:20:11<21:07:30, 113.17s/it]

Training_loss 0.55295,   Accuracy 0.79190


 66%|██████▋   | 1329/2000 [22:22:01<20:54:47, 112.20s/it]

Training_loss 0.55297,   Accuracy 0.79187


 66%|██████▋   | 1330/2000 [22:23:47<20:31:36, 110.29s/it]

Training_loss 0.55289,   Accuracy 0.79209


 67%|██████▋   | 1331/2000 [22:25:33<20:16:14, 109.08s/it]

Training_loss 0.55272,   Accuracy 0.79206


 67%|██████▋   | 1332/2000 [22:27:27<20:29:27, 110.43s/it]

Training_loss 0.55290,   Accuracy 0.79167


 67%|██████▋   | 1333/2000 [22:29:16<20:24:04, 110.11s/it]

Training_loss 0.55276,   Accuracy 0.79203


 67%|██████▋   | 1334/2000 [22:31:08<20:29:47, 110.79s/it]

Training_loss 0.55296,   Accuracy 0.79187


 67%|██████▋   | 1335/2000 [22:33:03<20:40:05, 111.89s/it]

Training_loss 0.55307,   Accuracy 0.79154


 67%|██████▋   | 1336/2000 [22:34:54<20:34:27, 111.55s/it]

Training_loss 0.55299,   Accuracy 0.79145


 67%|██████▋   | 1337/2000 [22:36:52<20:54:02, 113.49s/it]

Training_loss 0.55294,   Accuracy 0.79151


 67%|██████▋   | 1338/2000 [22:38:56<21:27:33, 116.70s/it]

Training_loss 0.55297,   Accuracy 0.79151


 67%|██████▋   | 1339/2000 [22:40:44<20:56:05, 114.02s/it]

Training_loss 0.55276,   Accuracy 0.79161


 67%|██████▋   | 1340/2000 [22:42:45<21:20:09, 116.38s/it]

Training_loss 0.55283,   Accuracy 0.79151


 67%|██████▋   | 1341/2000 [22:44:40<21:12:20, 115.84s/it]

Training_loss 0.55284,   Accuracy 0.79154


 67%|██████▋   | 1342/2000 [22:46:38<21:17:07, 116.45s/it]

Training_loss 0.55272,   Accuracy 0.79183


 67%|██████▋   | 1343/2000 [22:48:28<20:55:32, 114.66s/it]

Training_loss 0.55267,   Accuracy 0.79193


 67%|██████▋   | 1344/2000 [22:50:17<20:34:33, 112.92s/it]

Training_loss 0.55267,   Accuracy 0.79190


 67%|██████▋   | 1345/2000 [22:52:14<20:43:31, 113.91s/it]

Training_loss 0.55267,   Accuracy 0.79190


 67%|██████▋   | 1346/2000 [22:54:10<20:50:50, 114.76s/it]

Training_loss 0.55245,   Accuracy 0.79203


 67%|██████▋   | 1347/2000 [22:56:13<21:14:40, 117.12s/it]

Training_loss 0.55254,   Accuracy 0.79203


 67%|██████▋   | 1348/2000 [22:58:09<21:08:23, 116.72s/it]

Training_loss 0.55256,   Accuracy 0.79187


 67%|██████▋   | 1349/2000 [23:00:12<21:26:49, 118.60s/it]

Training_loss 0.55249,   Accuracy 0.79206


 68%|██████▊   | 1350/2000 [23:02:10<21:23:54, 118.51s/it]

Training_loss 0.55251,   Accuracy 0.79206


 68%|██████▊   | 1351/2000 [23:04:01<20:57:30, 116.26s/it]

Training_loss 0.55242,   Accuracy 0.79206


 68%|██████▊   | 1352/2000 [23:05:48<20:26:08, 113.53s/it]

Training_loss 0.55237,   Accuracy 0.79177


 68%|██████▊   | 1353/2000 [23:07:37<20:08:17, 112.05s/it]

Training_loss 0.55229,   Accuracy 0.79183


 68%|██████▊   | 1354/2000 [23:09:21<19:42:33, 109.84s/it]

Training_loss 0.55246,   Accuracy 0.79158


 68%|██████▊   | 1355/2000 [23:11:09<19:32:50, 109.10s/it]

Training_loss 0.55256,   Accuracy 0.79138


 68%|██████▊   | 1356/2000 [23:12:56<19:23:53, 108.44s/it]

Training_loss 0.55275,   Accuracy 0.79145


 68%|██████▊   | 1357/2000 [23:14:44<19:20:57, 108.33s/it]

Training_loss 0.55281,   Accuracy 0.79145


 68%|██████▊   | 1358/2000 [23:16:28<19:04:38, 106.98s/it]

Training_loss 0.55305,   Accuracy 0.79138


 68%|██████▊   | 1359/2000 [23:18:15<19:03:26, 107.03s/it]

Training_loss 0.55292,   Accuracy 0.79125


 68%|██████▊   | 1360/2000 [23:20:05<19:10:25, 107.85s/it]

Training_loss 0.55295,   Accuracy 0.79141


 68%|██████▊   | 1361/2000 [23:21:47<18:51:32, 106.25s/it]

Training_loss 0.55298,   Accuracy 0.79141


 68%|██████▊   | 1362/2000 [23:23:42<19:18:46, 108.98s/it]

Training_loss 0.55305,   Accuracy 0.79109


 68%|██████▊   | 1363/2000 [23:25:26<19:01:24, 107.51s/it]

Training_loss 0.55293,   Accuracy 0.79145


 68%|██████▊   | 1364/2000 [23:27:15<19:03:08, 107.84s/it]

Training_loss 0.55323,   Accuracy 0.79057


 68%|██████▊   | 1365/2000 [23:29:03<19:02:24, 107.94s/it]

Training_loss 0.55345,   Accuracy 0.79057


 68%|██████▊   | 1366/2000 [23:30:49<18:52:53, 107.21s/it]

Training_loss 0.55329,   Accuracy 0.79067


 68%|██████▊   | 1367/2000 [23:32:39<18:59:29, 108.01s/it]

Training_loss 0.55339,   Accuracy 0.79054


 68%|██████▊   | 1368/2000 [23:34:32<19:15:55, 109.74s/it]

Training_loss 0.55341,   Accuracy 0.79057


 68%|██████▊   | 1369/2000 [23:36:22<19:12:34, 109.60s/it]

Training_loss 0.55351,   Accuracy 0.79057


 68%|██████▊   | 1370/2000 [23:38:12<19:13:20, 109.84s/it]

Training_loss 0.55364,   Accuracy 0.79022


 69%|██████▊   | 1371/2000 [23:38:57<15:47:31, 90.38s/it] 

Training_loss 0.55392,   Accuracy 0.78915


 69%|██████▊   | 1372/2000 [23:39:09<11:38:58, 66.78s/it]

Training_loss 0.55397,   Accuracy 0.78902


 69%|██████▊   | 1373/2000 [23:39:25<8:57:53, 51.47s/it] 

Training_loss 0.55408,   Accuracy 0.78767


 69%|██████▊   | 1374/2000 [23:41:14<11:57:11, 68.74s/it]

Training_loss 0.55387,   Accuracy 0.78912


 69%|██████▉   | 1375/2000 [23:43:10<14:25:55, 83.13s/it]

Training_loss 0.55398,   Accuracy 0.78776


 69%|██████▉   | 1376/2000 [23:45:05<16:04:37, 92.75s/it]

Training_loss 0.55397,   Accuracy 0.78770


 69%|██████▉   | 1377/2000 [23:46:55<16:56:18, 97.88s/it]

Training_loss 0.55410,   Accuracy 0.78747


 69%|██████▉   | 1378/2000 [23:48:39<17:13:54, 99.73s/it]

Training_loss 0.55402,   Accuracy 0.78757


 69%|██████▉   | 1379/2000 [23:50:34<17:59:08, 104.27s/it]

Training_loss 0.55368,   Accuracy 0.78935


 69%|██████▉   | 1380/2000 [23:52:18<17:56:29, 104.18s/it]

Training_loss 0.55360,   Accuracy 0.78935


 69%|██████▉   | 1381/2000 [23:54:04<17:59:11, 104.61s/it]

Training_loss 0.55360,   Accuracy 0.78935


 69%|██████▉   | 1382/2000 [23:55:51<18:05:46, 105.42s/it]

Training_loss 0.55370,   Accuracy 0.78906


 69%|██████▉   | 1383/2000 [23:57:40<18:14:39, 106.45s/it]

Training_loss 0.55367,   Accuracy 0.78925


 69%|██████▉   | 1384/2000 [23:59:26<18:10:41, 106.24s/it]

Training_loss 0.55337,   Accuracy 0.79002


 69%|██████▉   | 1385/2000 [24:01:16<18:21:16, 107.44s/it]

Training_loss 0.55325,   Accuracy 0.79038


 69%|██████▉   | 1386/2000 [24:03:17<19:01:13, 111.52s/it]

Training_loss 0.55317,   Accuracy 0.79044


 69%|██████▉   | 1387/2000 [24:05:26<19:53:47, 116.85s/it]

Training_loss 0.55310,   Accuracy 0.79057


 69%|██████▉   | 1388/2000 [24:07:31<20:16:51, 119.30s/it]

Training_loss 0.55307,   Accuracy 0.79054


 69%|██████▉   | 1389/2000 [24:09:40<20:43:44, 122.13s/it]

Training_loss 0.55315,   Accuracy 0.79038


 70%|██████▉   | 1390/2000 [24:11:40<20:35:20, 121.51s/it]

Training_loss 0.55337,   Accuracy 0.78973


 70%|██████▉   | 1391/2000 [24:13:43<20:38:54, 122.06s/it]

Training_loss 0.55336,   Accuracy 0.78967


 70%|██████▉   | 1392/2000 [24:15:32<19:55:04, 117.94s/it]

Training_loss 0.55354,   Accuracy 0.78773


 70%|██████▉   | 1393/2000 [24:17:16<19:12:51, 113.96s/it]

Training_loss 0.55349,   Accuracy 0.78783


 70%|██████▉   | 1394/2000 [24:19:02<18:44:22, 111.32s/it]

Training_loss 0.55350,   Accuracy 0.78773


 70%|██████▉   | 1395/2000 [24:20:48<18:27:27, 109.83s/it]

Training_loss 0.55354,   Accuracy 0.78767


 70%|██████▉   | 1396/2000 [24:22:34<18:15:32, 108.83s/it]

Training_loss 0.55364,   Accuracy 0.78754


 70%|██████▉   | 1397/2000 [24:24:23<18:13:52, 108.84s/it]

Training_loss 0.55375,   Accuracy 0.78705


 70%|██████▉   | 1398/2000 [24:26:09<18:02:39, 107.91s/it]

Training_loss 0.55413,   Accuracy 0.78486


 70%|██████▉   | 1399/2000 [24:27:58<18:04:36, 108.28s/it]

Training_loss 0.55415,   Accuracy 0.78385


 70%|███████   | 1400/2000 [24:29:52<18:20:50, 110.08s/it]

Training_loss 0.55403,   Accuracy 0.78541


 70%|███████   | 1401/2000 [24:31:36<18:00:06, 108.19s/it]

Training_loss 0.55397,   Accuracy 0.78544


 70%|███████   | 1402/2000 [24:33:25<17:59:58, 108.36s/it]

Training_loss 0.55409,   Accuracy 0.78486


 70%|███████   | 1403/2000 [24:35:10<17:49:33, 107.49s/it]

Training_loss 0.55417,   Accuracy 0.78308


 70%|███████   | 1404/2000 [24:36:58<17:49:20, 107.65s/it]

Training_loss 0.55403,   Accuracy 0.78486


 70%|███████   | 1405/2000 [24:38:52<18:06:15, 109.54s/it]

Training_loss 0.55404,   Accuracy 0.78486


 70%|███████   | 1406/2000 [24:40:44<18:10:02, 110.11s/it]

Training_loss 0.55405,   Accuracy 0.78486


 70%|███████   | 1407/2000 [24:42:49<18:52:37, 114.60s/it]

Training_loss 0.55412,   Accuracy 0.78308


 70%|███████   | 1408/2000 [24:44:44<18:53:04, 114.84s/it]

Training_loss 0.55405,   Accuracy 0.78486


 70%|███████   | 1409/2000 [24:46:48<19:18:22, 117.60s/it]

Training_loss 0.55401,   Accuracy 0.78482


 70%|███████   | 1410/2000 [24:48:50<19:27:00, 118.68s/it]

Training_loss 0.55373,   Accuracy 0.78663


 71%|███████   | 1411/2000 [24:50:45<19:15:27, 117.70s/it]

Training_loss 0.55408,   Accuracy 0.78308


 71%|███████   | 1412/2000 [24:52:37<18:56:57, 116.02s/it]

Training_loss 0.55401,   Accuracy 0.78305


 71%|███████   | 1413/2000 [24:54:21<18:18:12, 112.25s/it]

Training_loss 0.55400,   Accuracy 0.78389


 71%|███████   | 1414/2000 [24:56:07<18:00:47, 110.66s/it]

Training_loss 0.55397,   Accuracy 0.78392


 71%|███████   | 1415/2000 [24:58:00<18:05:36, 111.34s/it]

Training_loss 0.55407,   Accuracy 0.78292


 71%|███████   | 1416/2000 [24:59:48<17:51:53, 110.13s/it]

Training_loss 0.55401,   Accuracy 0.78285


 71%|███████   | 1417/2000 [25:01:40<17:57:45, 110.92s/it]

Training_loss 0.55405,   Accuracy 0.78295


 71%|███████   | 1418/2000 [25:03:25<17:38:09, 109.09s/it]

Training_loss 0.55402,   Accuracy 0.78292


 71%|███████   | 1419/2000 [25:05:12<17:28:40, 108.30s/it]

Training_loss 0.55396,   Accuracy 0.78314


 71%|███████   | 1420/2000 [25:07:01<17:30:19, 108.65s/it]

Training_loss 0.55381,   Accuracy 0.78402


 71%|███████   | 1421/2000 [25:09:01<17:59:59, 111.92s/it]

Training_loss 0.55366,   Accuracy 0.78398


 71%|███████   | 1422/2000 [25:10:50<17:50:49, 111.16s/it]

Training_loss 0.55351,   Accuracy 0.78595


 71%|███████   | 1423/2000 [25:12:43<17:52:26, 111.52s/it]

Training_loss 0.55330,   Accuracy 0.78692


 71%|███████   | 1424/2000 [25:14:35<17:52:34, 111.73s/it]

Training_loss 0.55320,   Accuracy 0.78692


 71%|███████▏  | 1425/2000 [25:16:24<17:42:32, 110.87s/it]

Training_loss 0.55331,   Accuracy 0.78621


 71%|███████▏  | 1426/2000 [25:18:09<17:24:39, 109.20s/it]

Training_loss 0.55309,   Accuracy 0.78696


 71%|███████▏  | 1427/2000 [25:20:00<17:27:14, 109.66s/it]

Training_loss 0.55298,   Accuracy 0.78699


 71%|███████▏  | 1428/2000 [25:21:44<17:09:51, 108.03s/it]

Training_loss 0.55313,   Accuracy 0.78692


 71%|███████▏  | 1429/2000 [25:23:28<16:57:39, 106.93s/it]

Training_loss 0.55302,   Accuracy 0.78705


 72%|███████▏  | 1430/2000 [25:25:12<16:46:12, 105.92s/it]

Training_loss 0.55289,   Accuracy 0.78834


 72%|███████▏  | 1431/2000 [25:27:01<16:53:10, 106.84s/it]

Training_loss 0.55298,   Accuracy 0.78699


 72%|███████▏  | 1432/2000 [25:28:51<17:00:10, 107.77s/it]

Training_loss 0.55321,   Accuracy 0.78589


 72%|███████▏  | 1433/2000 [25:30:38<16:55:51, 107.50s/it]

Training_loss 0.55322,   Accuracy 0.78579


 72%|███████▏  | 1434/2000 [25:32:25<16:53:56, 107.48s/it]

Training_loss 0.55322,   Accuracy 0.78573


 72%|███████▏  | 1435/2000 [25:34:12<16:51:44, 107.44s/it]

Training_loss 0.55309,   Accuracy 0.78589


 72%|███████▏  | 1436/2000 [25:35:58<16:46:10, 107.04s/it]

Training_loss 0.55322,   Accuracy 0.78576


 72%|███████▏  | 1437/2000 [25:37:49<16:53:03, 107.96s/it]

Training_loss 0.55301,   Accuracy 0.78592


 72%|███████▏  | 1438/2000 [25:39:34<16:43:38, 107.15s/it]

Training_loss 0.55323,   Accuracy 0.78579


 72%|███████▏  | 1439/2000 [25:41:32<17:12:57, 110.48s/it]

Training_loss 0.55329,   Accuracy 0.78457


 72%|███████▏  | 1440/2000 [25:43:32<17:38:27, 113.41s/it]

Training_loss 0.55326,   Accuracy 0.78450


 72%|███████▏  | 1441/2000 [25:45:28<17:43:30, 114.15s/it]

Training_loss 0.55334,   Accuracy 0.78450


 72%|███████▏  | 1442/2000 [25:47:17<17:27:54, 112.68s/it]

Training_loss 0.55334,   Accuracy 0.78450


 72%|███████▏  | 1443/2000 [25:49:03<17:06:39, 110.59s/it]

Training_loss 0.55336,   Accuracy 0.78366


 72%|███████▏  | 1444/2000 [25:50:51<16:55:44, 109.61s/it]

Training_loss 0.55355,   Accuracy 0.78192


 72%|███████▏  | 1445/2000 [25:52:34<16:35:42, 107.64s/it]

Training_loss 0.55362,   Accuracy 0.78201


 72%|███████▏  | 1446/2000 [25:54:24<16:41:03, 108.42s/it]

Training_loss 0.55364,   Accuracy 0.78201


 72%|███████▏  | 1447/2000 [25:56:12<16:37:52, 108.27s/it]

Training_loss 0.55375,   Accuracy 0.78208


 72%|███████▏  | 1448/2000 [25:58:10<17:02:47, 111.17s/it]

Training_loss 0.55379,   Accuracy 0.78198


 72%|███████▏  | 1449/2000 [25:58:26<12:39:36, 82.72s/it] 

Training_loss 0.55358,   Accuracy 0.78192


 72%|███████▎  | 1450/2000 [25:58:42<9:34:27, 62.67s/it] 

Training_loss 0.55368,   Accuracy 0.78214


 73%|███████▎  | 1451/2000 [25:59:01<7:34:33, 49.68s/it]

Training_loss 0.55386,   Accuracy 0.78195


 73%|███████▎  | 1452/2000 [25:59:14<5:53:06, 38.66s/it]

Training_loss 0.55384,   Accuracy 0.78188


 73%|███████▎  | 1453/2000 [25:59:27<4:43:06, 31.05s/it]

Training_loss 0.55375,   Accuracy 0.78188


 73%|███████▎  | 1454/2000 [25:59:41<3:54:38, 25.78s/it]

Training_loss 0.55370,   Accuracy 0.78185


 73%|███████▎  | 1455/2000 [25:59:57<3:28:47, 22.99s/it]

Training_loss 0.55377,   Accuracy 0.78195


 73%|███████▎  | 1456/2000 [26:00:10<2:59:34, 19.81s/it]

Training_loss 0.55387,   Accuracy 0.78124


 73%|███████▎  | 1457/2000 [26:00:23<2:41:27, 17.84s/it]

Training_loss 0.55390,   Accuracy 0.78124


 73%|███████▎  | 1458/2000 [26:00:36<2:26:52, 16.26s/it]

Training_loss 0.55396,   Accuracy 0.78117


 73%|███████▎  | 1459/2000 [26:00:51<2:24:06, 15.98s/it]

Training_loss 0.55381,   Accuracy 0.78117


 73%|███████▎  | 1460/2000 [26:01:04<2:14:36, 14.96s/it]

Training_loss 0.55374,   Accuracy 0.78179


 73%|███████▎  | 1461/2000 [26:01:17<2:09:05, 14.37s/it]

Training_loss 0.55357,   Accuracy 0.78205


 73%|███████▎  | 1462/2000 [26:01:30<2:07:06, 14.18s/it]

Training_loss 0.55365,   Accuracy 0.78179


In [None]:
#plot.plot(test_loss)
print(parameters_to_vector(models[19].parameters()), W[1])

In [None]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

in_cluster_proj_norm = np.array(in_cluster_proj_norm)
out_cluster_proj_norm = np.array(out_cluster_proj_norm)
in_cluster_proj_diff_norm = np.array(in_cluster_proj_diff_norm)
out_cluster_proj_diff_norm = np.array(out_cluster_proj_diff_norm)


In [None]:
'''
  0%|          | 1/2000 [00:12<6:59:49, 12.60s/it]
Training_loss 0.69317,   Accuracy 0.52177
  0%|          | 2/2000 [00:25<6:58:41, 12.57s/it]
Training_loss 0.69256,   Accuracy 0.52523
  0%|          | 3/2000 [00:37<6:55:30, 12.48s/it]
Training_loss 0.69282,   Accuracy 0.52400
  0%|          | 4/2000 [00:50<6:56:56, 12.53s/it]
Training_loss 0.69216,   Accuracy 0.52552
  0%|          | 5/2000 [01:05<7:35:34, 13.70s/it]
Training_loss 0.69178,   Accuracy 0.52765
  0%|          | 6/2000 [01:18<7:24:07, 13.36s/it]
Training_loss 0.69037,   Accuracy 0.55240
  0%|          | 7/2000 [01:32<7:32:02, 13.61s/it]
Training_loss 0.68986,   Accuracy 0.55653
  0%|          | 8/2000 [01:47<7:42:39, 13.94s/it]
Training_loss 0.68921,   Accuracy 0.56338
'''

In [None]:
np.save( 'training_loss_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), test_loss)
np.save('relative_error_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), total_rel_error)
np.save( 'in_cluster_proj_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), in_cluster_proj_norm)
np.save('out_cluster_proj_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), out_cluster_proj_norm)
np.save( 'in_cluster_proj_diff_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), in_cluster_proj_diff_norm)
np.save('out_cluster_proj_diff_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), out_cluster_proj_diff_norm)

In [None]:
# task_loss dictionary
for key, value in task_loss.items():
    # Convert the list to a NumPy array
    array_loss = np.array(value)
    
    # Save the NumPy array using the specified format
    np.save('training_loss_sheave_fml_task' + key + '_' + str(lamda).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), array_loss)

# task_rel_error dictionary
for key, value in task_rel_error.items():
    # Convert the list to a NumPy array
    array_rel_error = np.array(value)
    
    # Save the NumPy array using the specified format
    np.save('relative_error_sheave_fml_task' + key + '_' + str(lamda).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), array_rel_error)