In [1]:
from keras.utils import to_categorical
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters
import networkx as nx

In [2]:
from sklearn.datasets import load_svmlight_file


def get_data(data):
    data = load_svmlight_file(data)
    return data[0].toarray(), data[1]
data = "./a1a.txt"
X, y = get_data(data)
y[y == -1] = 0
# Convert labels to one-hot encoding
y = to_categorical(y)

data = "./a1a_t"
X_test,y_test = get_data(data)
y_test[y_test == -1] = 0
y_test = to_categorical(y_test)

In [3]:
y[y[:, 0] == 1].shape
X.shape

(1605, 119)

In [4]:
# Dataset partitioning
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.shape[0])
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split

In [5]:
no_users = 30

In [6]:
X, y = random_split(X, y, 3, 42)

In [7]:
X1 = X[0][:, 0:50]
X2 = X[1][:, 30: 90]
X3 = X[2][:, 20:]
y1 = y[0]
y2 = y[1]
y3 = y[2]
subset_ranges = [np.arange(0, 50), np.arange(30, 90), np.arange(20, 119)]
subset_lengths = [subset_ranges[0].shape[0], subset_ranges[1].shape[0], subset_ranges[2].shape[0]]

In [8]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout, 5 * pout],[pout, pin, 5 * pout],[5 * pout, 5 * pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs, seed=0)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10, 10]
features_sizes = [8, 7, 6, 5]
pin = 0.5
pout = 0.1
seed = 0
alpha = 1e-2
lamda = 0#1e-2#1e-3
eta = 1e-2
d0 = min(subset_lengths)
no_users = sum(cluster_sizes)
batch_size = 10
epochs = 1
it = 2000
G = generate_graph(cluster_sizes, pin, pout, seed)

# Set a random seed for reproducibility
seed = 17
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [9]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [10]:
y1.shape

(535, 2)

In [11]:
datapoints = {}
count = 0

X1, y1 = random_split(X1, y1, 10, 42)
X2, y2 = random_split(X2, y2, 10, 42)
X3, y3 = random_split(X3, y3, 10, 42)

X_train = [X1, X2, X3]
y_train = [y1, y2, y3]
input_sizes = [X1[0].shape[1], X2[0].shape[1], X3[0].shape[1]]

for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        
        test_features = X_test[:, subset_ranges[i]]
        test_label = y_test
        datapoints[count] = {
                'features': X_train[i][j],
                'degree': node_degree(count, G),
                'label': y_train[i][j],
                'neighbors': get_neighbors(count, G),
                'input_size': X_train[i][j].shape[1],
                'test_features':test_features,
                'test_labels': test_label
            }
        count += 1

In [12]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [13]:

class MLP_Net(nn.Module):
    def __init__(self, input_size, num_classes, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(input_size, num_classes, bias=True)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        output = F.softmax(self.fc1(x), dim=1)  # Applying softmax along the second dimension
        return output

In [14]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [15]:
model = MLP_Net(datapoints[0]["input_size"], 2, user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[0]["features"], datapoints[0]["label"]), batch_size=20, shuffle=True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(1000):
    for j, (x, y) in zip(range(1), dataloader):
        criterion = nn.CrossEntropyLoss()
        optimizer.zero_grad()
        yhat = model(x)
        
        # Calculate accuracy
        _, predicted = torch.max(yhat, 1)
        _, predicted_true = torch.max(y, 1)
        correct_predictions = (predicted == predicted_true).sum().item()
         

        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss.detach(), correct_predictions)
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

0 tensor(0.7243) 5
1 tensor(0.7168) 7
2 tensor(0.7072) 7
3 tensor(0.7037) 6
4 tensor(0.7381) 3
5 tensor(0.7233) 4
6 tensor(0.7124) 5
7 tensor(0.7149) 6
8 tensor(0.7011) 10
9 tensor(0.7044) 10
10 tensor(0.7113) 7
11 tensor(0.7167) 7
12 tensor(0.6863) 12
13 tensor(0.6950) 11
14 tensor(0.6973) 9
15 tensor(0.6853) 10
16 tensor(0.6938) 11
17 tensor(0.7039) 8
18 tensor(0.6882) 11
19 tensor(0.6918) 11
20 tensor(0.6919) 9
21 tensor(0.6995) 9
22 tensor(0.7058) 8
23 tensor(0.6918) 9
24 tensor(0.7008) 8
25 tensor(0.6934) 11
26 tensor(0.6974) 8
27 tensor(0.6901) 7
28 tensor(0.6792) 11
29 tensor(0.6794) 12
30 tensor(0.6853) 13
31 tensor(0.6725) 14
32 tensor(0.6809) 10
33 tensor(0.6903) 8
34 tensor(0.6936) 8
35 tensor(0.6877) 9
36 tensor(0.6837) 10
37 tensor(0.6899) 10
38 tensor(0.6755) 12
39 tensor(0.6671) 13
40 tensor(0.6928) 11
41 tensor(0.6868) 11
42 tensor(0.6961) 12
43 tensor(0.6730) 13
44 tensor(0.6791) 14
45 tensor(0.6768) 12
46 tensor(0.6698) 14
47 tensor(0.6851) 12
48 tensor(0.6689) 13
49 

391 tensor(0.5377) 16
392 tensor(0.5500) 15
393 tensor(0.5050) 17
394 tensor(0.5224) 17
395 tensor(0.5908) 13
396 tensor(0.5525) 16
397 tensor(0.5198) 17
398 tensor(0.5723) 15
399 tensor(0.5902) 14
400 tensor(0.5836) 14
401 tensor(0.5484) 15
402 tensor(0.5734) 14
403 tensor(0.5708) 15
404 tensor(0.5598) 15
405 tensor(0.6095) 13
406 tensor(0.5313) 16
407 tensor(0.5373) 17
408 tensor(0.5037) 17
409 tensor(0.5732) 14
410 tensor(0.5484) 15
411 tensor(0.5778) 14
412 tensor(0.5294) 16
413 tensor(0.4947) 18
414 tensor(0.5146) 16
415 tensor(0.4988) 17
416 tensor(0.5412) 16
417 tensor(0.5373) 16
418 tensor(0.5748) 14
419 tensor(0.5267) 16
420 tensor(0.5465) 16
421 tensor(0.5716) 14
422 tensor(0.5905) 14
423 tensor(0.6338) 12
424 tensor(0.5382) 16
425 tensor(0.5274) 17
426 tensor(0.5303) 16
427 tensor(0.5688) 15
428 tensor(0.5344) 16
429 tensor(0.5711) 14
430 tensor(0.5696) 14
431 tensor(0.5351) 16
432 tensor(0.5081) 16
433 tensor(0.5489) 15
434 tensor(0.4854) 18
435 tensor(0.5748) 14
436 tensor

770 tensor(0.5260) 16
771 tensor(0.4780) 18
772 tensor(0.4400) 19
773 tensor(0.4815) 17
774 tensor(0.4662) 18
775 tensor(0.5341) 16
776 tensor(0.5848) 13
777 tensor(0.5006) 16
778 tensor(0.6048) 13
779 tensor(0.4854) 17
780 tensor(0.4620) 18
781 tensor(0.5233) 16
782 tensor(0.5625) 14
783 tensor(0.5341) 15
784 tensor(0.5106) 16
785 tensor(0.4993) 17
786 tensor(0.5556) 14
787 tensor(0.5120) 16
788 tensor(0.5392) 16
789 tensor(0.4451) 19
790 tensor(0.4800) 18
791 tensor(0.5396) 15
792 tensor(0.4685) 17
793 tensor(0.5830) 13
794 tensor(0.5429) 15
795 tensor(0.5179) 17
796 tensor(0.5785) 13
797 tensor(0.4983) 17
798 tensor(0.5087) 16
799 tensor(0.5291) 16
800 tensor(0.5090) 16
801 tensor(0.5917) 13
802 tensor(0.5725) 14
803 tensor(0.5918) 13
804 tensor(0.5259) 15
805 tensor(0.5196) 15
806 tensor(0.5194) 16
807 tensor(0.5142) 16
808 tensor(0.5253) 16
809 tensor(0.5591) 14
810 tensor(0.5639) 15
811 tensor(0.5088) 16
812 tensor(0.5214) 16
813 tensor(0.5481) 15
814 tensor(0.5055) 15
815 tensor

In [16]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [17]:
# Preparing projection matrices
models = [MLP_Net(input_size=datapoints[i]['input_size'], num_classes=2, user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, models, first_run=True):
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        # Specify the dimensions of the rectangular matrix
                        row, column = d0, parameters_to_vector(models[i].parameters()).size()[0]

                        # Generate random values for the diagonal from a normal distribution
                        diag_values = 1.0 + 1.0 * torch.randn(min(row, column))

                        # Create a rectangular matrix with diagonal elements
                        mat = torch.diag(diag_values)

                        # If the matrix is larger than the diagonal vector, fill the remaining elements with zeros
                       
                        mat = torch.cat((mat, torch.zeros(row, column - row)), dim=1)

                        

                        # Append the matrix to the list
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[i].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[i][j], parameters_to_vector(models[i].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights, models)



In [18]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["test_features"], dataset["test_labels"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        _, pred = torch.max(output, 1)
        _, predicted_true = torch.max(labels, 1)
        correct += pred.eq(predicted_true.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    correct /= len(test_loader.dataset)
    
    return test_loss, correct

In [19]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(input_size=datapoints[i]['input_size'], num_classes=2, user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(input_size=datapoints[i]['input_size'], num_classes=2, user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.CrossEntropyLoss()


train_loss = []
test_loss = []
test_accuracy = []
total_rel_error = []

in_cluster_proj_norm = []
out_cluster_proj_norm = []
in_cluster_proj_diff_norm = []
out_cluster_proj_diff_norm = []
task_loss = {'0':[],
                '1':[],
                '2':[]}
task_rel_error = {'0':[],
            '1':[],
            '2':[]}

for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    projected_weights = []
    update_ProjWeight(projection_list, projected_weights, models, first_run=False)
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            temp_mat = torch.outer(projected_weights[i][j] - projected_weights[j][i], weights).clone()
            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * temp_mat)
    
    in_cluster_proj_norm_round = 0
    out_cluster_proj_norm_round = 0
    in_cluster_proj_diff_round = 0
    out_cluster_proj_diff_round = 0
    in_edges = 0
    out_edges = 0
    
    for i in range(no_users//3):
        for j in G.neighbors(i):
            if j < no_users//3:
                in_edges += 1
                in_cluster_proj_norm_round += torch.norm(projection_list[i][j]).detach().numpy()
                in_cluster_proj_diff_round += torch.norm(projected_weights[i][j] - projected_weights[j][i]).detach().numpy()
            else:
                out_edges += 1
                out_cluster_proj_norm_round += torch.norm(projection_list[i][j]).detach().numpy()
                out_cluster_proj_diff_round += torch.norm(projected_weights[i][j] - projected_weights[j][i]).detach().numpy()
    in_cluster_proj_norm.append(in_cluster_proj_norm_round / in_edges)
    out_cluster_proj_norm.append(out_cluster_proj_norm_round / out_edges)
    in_cluster_proj_diff_norm.append(in_cluster_proj_diff_round / in_edges)
    out_cluster_proj_diff_norm.append(out_cluster_proj_diff_round / out_edges)
            
                                         
                                              
    
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    user_rel_error = 0
    per_task_loss = []
    per_task_rel_error = []
    
    for k in range(no_users):
      
        g_loss, acc = testing(models[k], datapoints[k], 50, criterion)
        local_test_loss.append(g_loss)
        user_rel_error += acc#rel_error(models[i])
        if (k + 1) % 10 == 0:
            task_loss[str(k // 10)].append(sum(per_task_loss) / 10)
            task_rel_error[str(k // 10)].append(sum(per_task_rel_error) / 10)
            per_task_loss = []
            per_task_rel_error = []
        per_task_loss.append(g_loss)
        per_task_rel_error.append(acc)#rel_error(models[i]))
    
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    total_rel_error.append(user_rel_error / no_users)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f,   Accuracy %2.5f"% (test_loss[-1], total_rel_error[-1]))

  0%|          | 1/2000 [00:18<10:04:17, 18.14s/it]

Training_loss 0.69400,   Accuracy 0.49996


  0%|          | 2/2000 [00:39<11:08:25, 20.07s/it]

Training_loss 0.69253,   Accuracy 0.50915


  0%|          | 3/2000 [00:57<10:32:41, 19.01s/it]

Training_loss 0.69073,   Accuracy 0.51974


  0%|          | 4/2000 [01:21<11:38:46, 21.01s/it]

Training_loss 0.68923,   Accuracy 0.52937


  0%|          | 5/2000 [01:44<12:06:59, 21.86s/it]

Training_loss 0.68765,   Accuracy 0.53775


  0%|          | 6/2000 [02:02<11:23:26, 20.57s/it]

Training_loss 0.68587,   Accuracy 0.54904


  0%|          | 7/2000 [02:27<12:04:07, 21.80s/it]

Training_loss 0.68437,   Accuracy 0.55766


  0%|          | 8/2000 [02:47<11:52:41, 21.47s/it]

Training_loss 0.68289,   Accuracy 0.56667


  0%|          | 9/2000 [03:07<11:31:55, 20.85s/it]

Training_loss 0.68125,   Accuracy 0.57633


  0%|          | 10/2000 [03:29<11:47:34, 21.33s/it]

Training_loss 0.67976,   Accuracy 0.58677


  1%|          | 11/2000 [03:53<12:12:53, 22.11s/it]

Training_loss 0.67818,   Accuracy 0.59654


  1%|          | 12/2000 [04:10<11:23:56, 20.64s/it]

Training_loss 0.67671,   Accuracy 0.60515


  1%|          | 13/2000 [04:26<10:35:54, 19.20s/it]

Training_loss 0.67523,   Accuracy 0.61253


  1%|          | 14/2000 [04:45<10:27:16, 18.95s/it]

Training_loss 0.67355,   Accuracy 0.62111


  1%|          | 15/2000 [05:03<10:19:39, 18.73s/it]

Training_loss 0.67200,   Accuracy 0.62912


  1%|          | 16/2000 [05:19<9:55:10, 18.00s/it] 

Training_loss 0.67087,   Accuracy 0.63474


  1%|          | 17/2000 [05:38<10:03:14, 18.25s/it]

Training_loss 0.66952,   Accuracy 0.64123


  1%|          | 18/2000 [05:52<9:19:12, 16.93s/it] 

Training_loss 0.66787,   Accuracy 0.65026


  1%|          | 19/2000 [06:05<8:45:26, 15.91s/it]

Training_loss 0.66661,   Accuracy 0.65668


  1%|          | 20/2000 [06:26<9:28:48, 17.24s/it]

Training_loss 0.66523,   Accuracy 0.66327


  1%|          | 21/2000 [06:44<9:36:29, 17.48s/it]

Training_loss 0.66398,   Accuracy 0.66910


  1%|          | 22/2000 [07:01<9:33:10, 17.39s/it]

Training_loss 0.66256,   Accuracy 0.67546


  1%|          | 23/2000 [07:18<9:27:15, 17.22s/it]

Training_loss 0.66118,   Accuracy 0.68162


  1%|          | 24/2000 [07:36<9:35:25, 17.47s/it]

Training_loss 0.66011,   Accuracy 0.68576


  1%|▏         | 25/2000 [07:53<9:32:42, 17.40s/it]

Training_loss 0.65894,   Accuracy 0.69057


  1%|▏         | 26/2000 [08:14<10:04:53, 18.39s/it]

Training_loss 0.65791,   Accuracy 0.69404


  1%|▏         | 27/2000 [08:34<10:24:23, 18.99s/it]

Training_loss 0.65646,   Accuracy 0.69975


  1%|▏         | 28/2000 [08:59<11:18:58, 20.66s/it]

Training_loss 0.65520,   Accuracy 0.70266


  1%|▏         | 29/2000 [09:22<11:41:10, 21.34s/it]

Training_loss 0.65427,   Accuracy 0.70460


  2%|▏         | 30/2000 [09:42<11:33:33, 21.12s/it]

Training_loss 0.65308,   Accuracy 0.70799


  2%|▏         | 31/2000 [10:00<11:03:47, 20.23s/it]

Training_loss 0.65201,   Accuracy 0.71056


  2%|▏         | 32/2000 [10:25<11:46:44, 21.55s/it]

Training_loss 0.65076,   Accuracy 0.71438


  2%|▏         | 33/2000 [10:47<11:48:55, 21.62s/it]

Training_loss 0.64979,   Accuracy 0.71617


  2%|▏         | 34/2000 [11:07<11:35:53, 21.24s/it]

Training_loss 0.64887,   Accuracy 0.71840


  2%|▏         | 35/2000 [11:25<10:59:14, 20.13s/it]

Training_loss 0.64770,   Accuracy 0.72163


  2%|▏         | 36/2000 [11:41<10:22:21, 19.01s/it]

Training_loss 0.64656,   Accuracy 0.72384


  2%|▏         | 37/2000 [12:02<10:41:53, 19.62s/it]

Training_loss 0.64557,   Accuracy 0.72598


  2%|▏         | 38/2000 [12:26<11:27:02, 21.01s/it]

Training_loss 0.64456,   Accuracy 0.72759


  2%|▏         | 39/2000 [12:50<11:51:59, 21.78s/it]

Training_loss 0.64364,   Accuracy 0.72941


  2%|▏         | 40/2000 [13:07<11:06:31, 20.40s/it]

Training_loss 0.64251,   Accuracy 0.73140


  2%|▏         | 41/2000 [13:31<11:39:09, 21.41s/it]

Training_loss 0.64147,   Accuracy 0.73308


  2%|▏         | 42/2000 [13:57<12:19:31, 22.66s/it]

Training_loss 0.64062,   Accuracy 0.73439


  2%|▏         | 43/2000 [14:18<12:04:13, 22.20s/it]

Training_loss 0.63970,   Accuracy 0.73572


  2%|▏         | 44/2000 [14:36<11:27:08, 21.08s/it]

Training_loss 0.63850,   Accuracy 0.73679


  2%|▏         | 45/2000 [15:07<13:00:50, 23.96s/it]

Training_loss 0.63766,   Accuracy 0.73802


  2%|▏         | 46/2000 [15:33<13:17:14, 24.48s/it]

Training_loss 0.63665,   Accuracy 0.73914


  2%|▏         | 47/2000 [15:57<13:19:57, 24.58s/it]

Training_loss 0.63590,   Accuracy 0.73983


  2%|▏         | 48/2000 [16:19<12:55:41, 23.84s/it]

Training_loss 0.63480,   Accuracy 0.74063


  2%|▏         | 49/2000 [16:41<12:32:11, 23.13s/it]

Training_loss 0.63394,   Accuracy 0.74130


  2%|▎         | 50/2000 [16:55<11:06:13, 20.50s/it]

Training_loss 0.63310,   Accuracy 0.74229


  3%|▎         | 51/2000 [17:20<11:49:11, 21.83s/it]

Training_loss 0.63226,   Accuracy 0.74304


  3%|▎         | 52/2000 [17:47<12:33:07, 23.20s/it]

Training_loss 0.63161,   Accuracy 0.74342


  3%|▎         | 53/2000 [18:09<12:21:14, 22.84s/it]

Training_loss 0.63081,   Accuracy 0.74398


  3%|▎         | 54/2000 [18:30<12:06:50, 22.41s/it]

Training_loss 0.62996,   Accuracy 0.74458


  3%|▎         | 55/2000 [18:51<11:49:38, 21.89s/it]

Training_loss 0.62933,   Accuracy 0.74508


  3%|▎         | 56/2000 [19:10<11:21:34, 21.04s/it]

Training_loss 0.62845,   Accuracy 0.74550


  3%|▎         | 57/2000 [19:43<13:16:00, 24.58s/it]

Training_loss 0.62763,   Accuracy 0.74578


  3%|▎         | 58/2000 [20:08<13:26:54, 24.93s/it]

Training_loss 0.62679,   Accuracy 0.74657


  3%|▎         | 59/2000 [20:29<12:48:36, 23.76s/it]

Training_loss 0.62603,   Accuracy 0.74706


  3%|▎         | 60/2000 [20:51<12:26:27, 23.09s/it]

Training_loss 0.62525,   Accuracy 0.74733


  3%|▎         | 61/2000 [21:12<12:04:04, 22.41s/it]

Training_loss 0.62452,   Accuracy 0.74796


  3%|▎         | 62/2000 [21:33<11:48:27, 21.93s/it]

Training_loss 0.62393,   Accuracy 0.74841


  3%|▎         | 63/2000 [21:51<11:18:00, 21.00s/it]

Training_loss 0.62317,   Accuracy 0.74902


  3%|▎         | 64/2000 [22:14<11:28:19, 21.33s/it]

Training_loss 0.62244,   Accuracy 0.74921


  3%|▎         | 65/2000 [22:31<10:46:40, 20.05s/it]

Training_loss 0.62167,   Accuracy 0.74958


  3%|▎         | 66/2000 [22:48<10:17:02, 19.14s/it]

Training_loss 0.62091,   Accuracy 0.74982


  3%|▎         | 67/2000 [23:05<9:57:27, 18.55s/it] 

Training_loss 0.62037,   Accuracy 0.75003


  3%|▎         | 68/2000 [23:25<10:15:14, 19.11s/it]

Training_loss 0.61974,   Accuracy 0.75038


  3%|▎         | 69/2000 [23:43<10:06:12, 18.84s/it]

Training_loss 0.61905,   Accuracy 0.75067


  4%|▎         | 70/2000 [24:06<10:41:24, 19.94s/it]

Training_loss 0.61828,   Accuracy 0.75098


  4%|▎         | 71/2000 [24:26<10:44:20, 20.04s/it]

Training_loss 0.61763,   Accuracy 0.75128


  4%|▎         | 72/2000 [24:46<10:41:28, 19.96s/it]

Training_loss 0.61698,   Accuracy 0.75153


  4%|▎         | 73/2000 [25:14<12:03:50, 22.54s/it]

Training_loss 0.61642,   Accuracy 0.75202


  4%|▎         | 74/2000 [25:35<11:43:02, 21.90s/it]

Training_loss 0.61582,   Accuracy 0.75208


  4%|▍         | 75/2000 [25:52<10:57:10, 20.48s/it]

Training_loss 0.61522,   Accuracy 0.75226


  4%|▍         | 76/2000 [26:12<10:52:48, 20.36s/it]

Training_loss 0.61467,   Accuracy 0.75256


  4%|▍         | 77/2000 [26:34<11:06:11, 20.79s/it]

Training_loss 0.61415,   Accuracy 0.75268


  4%|▍         | 78/2000 [26:56<11:22:50, 21.32s/it]

Training_loss 0.61355,   Accuracy 0.75316


  4%|▍         | 79/2000 [27:18<11:24:07, 21.37s/it]

Training_loss 0.61303,   Accuracy 0.75342


  4%|▍         | 80/2000 [27:36<10:55:24, 20.48s/it]

Training_loss 0.61241,   Accuracy 0.75369


  4%|▍         | 81/2000 [27:57<10:54:30, 20.46s/it]

Training_loss 0.61195,   Accuracy 0.75394


  4%|▍         | 82/2000 [28:15<10:34:55, 19.86s/it]

Training_loss 0.61142,   Accuracy 0.75420


  4%|▍         | 83/2000 [28:35<10:34:21, 19.85s/it]

Training_loss 0.61079,   Accuracy 0.75436


  4%|▍         | 84/2000 [29:29<15:59:54, 30.06s/it]

Training_loss 0.61018,   Accuracy 0.75459


  4%|▍         | 85/2000 [29:45<13:42:38, 25.77s/it]

Training_loss 0.60957,   Accuracy 0.75484


  4%|▍         | 86/2000 [30:00<12:00:41, 22.59s/it]

Training_loss 0.60907,   Accuracy 0.75504


  4%|▍         | 87/2000 [30:16<10:55:58, 20.57s/it]

Training_loss 0.60852,   Accuracy 0.75541


  4%|▍         | 88/2000 [30:30<9:53:51, 18.64s/it] 

Training_loss 0.60800,   Accuracy 0.75557


  4%|▍         | 89/2000 [30:52<10:28:32, 19.73s/it]

Training_loss 0.60744,   Accuracy 0.75570


  4%|▍         | 90/2000 [32:52<26:22:01, 49.70s/it]

Training_loss 0.60692,   Accuracy 0.75582


  5%|▍         | 91/2000 [34:50<37:11:58, 70.15s/it]

Training_loss 0.60637,   Accuracy 0.75608


  5%|▍         | 92/2000 [36:53<45:38:14, 86.11s/it]

Training_loss 0.60593,   Accuracy 0.75618


  5%|▍         | 93/2000 [38:50<50:34:07, 95.46s/it]

Training_loss 0.60547,   Accuracy 0.75643


  5%|▍         | 94/2000 [40:46<53:45:04, 101.52s/it]

Training_loss 0.60509,   Accuracy 0.75657


  5%|▍         | 95/2000 [43:12<60:45:20, 114.81s/it]

Training_loss 0.60462,   Accuracy 0.75665


  5%|▍         | 96/2000 [45:28<64:11:17, 121.36s/it]

Training_loss 0.60416,   Accuracy 0.75678


  5%|▍         | 97/2000 [47:41<65:58:43, 124.82s/it]

Training_loss 0.60371,   Accuracy 0.75686


  5%|▍         | 98/2000 [49:43<65:28:41, 123.93s/it]

Training_loss 0.60329,   Accuracy 0.75694


  5%|▍         | 99/2000 [51:38<63:58:50, 121.16s/it]

Training_loss 0.60282,   Accuracy 0.75710


  5%|▌         | 100/2000 [53:35<63:15:22, 119.85s/it]

Training_loss 0.60239,   Accuracy 0.75731


  5%|▌         | 101/2000 [55:59<67:06:40, 127.23s/it]

Training_loss 0.60193,   Accuracy 0.75737


  5%|▌         | 102/2000 [58:05<66:49:49, 126.76s/it]

Training_loss 0.60156,   Accuracy 0.75747


  5%|▌         | 103/2000 [1:00:12<66:53:25, 126.94s/it]

Training_loss 0.60120,   Accuracy 0.75768


  5%|▌         | 104/2000 [1:02:11<65:36:35, 124.58s/it]

Training_loss 0.60085,   Accuracy 0.75775


  5%|▌         | 105/2000 [1:03:59<62:59:29, 119.67s/it]

Training_loss 0.60038,   Accuracy 0.75785


  5%|▌         | 106/2000 [1:05:45<60:44:03, 115.44s/it]

Training_loss 0.60002,   Accuracy 0.75799


  5%|▌         | 107/2000 [1:07:30<59:08:05, 112.46s/it]

Training_loss 0.59967,   Accuracy 0.75802


  5%|▌         | 108/2000 [1:09:15<57:55:33, 110.22s/it]

Training_loss 0.59924,   Accuracy 0.75810


  5%|▌         | 109/2000 [1:11:00<56:56:51, 108.41s/it]

Training_loss 0.59888,   Accuracy 0.75819


  6%|▌         | 110/2000 [1:12:45<56:23:35, 107.42s/it]

Training_loss 0.59851,   Accuracy 0.75832


  6%|▌         | 111/2000 [1:14:32<56:21:30, 107.41s/it]

Training_loss 0.59815,   Accuracy 0.75837


  6%|▌         | 112/2000 [1:16:23<56:56:22, 108.57s/it]

Training_loss 0.59774,   Accuracy 0.75841


  6%|▌         | 113/2000 [1:18:12<56:55:49, 108.61s/it]

Training_loss 0.59747,   Accuracy 0.75851


  6%|▌         | 114/2000 [1:20:02<57:08:17, 109.07s/it]

Training_loss 0.59704,   Accuracy 0.75850


  6%|▌         | 115/2000 [1:21:56<57:55:01, 110.61s/it]

Training_loss 0.59669,   Accuracy 0.75853


  6%|▌         | 116/2000 [1:23:50<58:16:52, 111.37s/it]

Training_loss 0.59642,   Accuracy 0.75855


  6%|▌         | 117/2000 [1:25:45<58:55:14, 112.65s/it]

Training_loss 0.59612,   Accuracy 0.75871


  6%|▌         | 118/2000 [1:27:32<57:54:35, 110.77s/it]

Training_loss 0.59568,   Accuracy 0.75877


  6%|▌         | 119/2000 [1:29:18<57:12:11, 109.48s/it]

Training_loss 0.59535,   Accuracy 0.75883


  6%|▌         | 120/2000 [1:31:09<57:19:17, 109.76s/it]

Training_loss 0.59511,   Accuracy 0.75890


  6%|▌         | 121/2000 [1:32:52<56:18:59, 107.90s/it]

Training_loss 0.59481,   Accuracy 0.75889


  6%|▌         | 122/2000 [1:34:39<56:09:41, 107.66s/it]

Training_loss 0.59452,   Accuracy 0.75886


  6%|▌         | 123/2000 [1:36:25<55:46:52, 106.99s/it]

Training_loss 0.59424,   Accuracy 0.75883


  6%|▌         | 124/2000 [1:38:12<55:47:57, 107.08s/it]

Training_loss 0.59389,   Accuracy 0.75889


  6%|▋         | 125/2000 [1:39:57<55:28:00, 106.50s/it]

Training_loss 0.59362,   Accuracy 0.75892


  6%|▋         | 126/2000 [1:41:49<56:13:51, 108.02s/it]

Training_loss 0.59333,   Accuracy 0.75899


  6%|▋         | 127/2000 [1:43:46<57:39:49, 110.83s/it]

Training_loss 0.59295,   Accuracy 0.75904


  6%|▋         | 128/2000 [1:45:48<59:26:48, 114.32s/it]

Training_loss 0.59265,   Accuracy 0.75914


  6%|▋         | 129/2000 [1:47:38<58:36:37, 112.77s/it]

Training_loss 0.59238,   Accuracy 0.75928


  6%|▋         | 130/2000 [1:49:27<58:02:05, 111.72s/it]

Training_loss 0.59209,   Accuracy 0.75936


  7%|▋         | 131/2000 [1:51:14<57:13:03, 110.21s/it]

Training_loss 0.59179,   Accuracy 0.75942


  7%|▋         | 132/2000 [1:53:00<56:31:50, 108.95s/it]

Training_loss 0.59151,   Accuracy 0.75948


  7%|▋         | 133/2000 [1:54:49<56:33:58, 109.07s/it]

Training_loss 0.59125,   Accuracy 0.75951


  7%|▋         | 134/2000 [1:56:34<55:50:54, 107.75s/it]

Training_loss 0.59092,   Accuracy 0.75950


  7%|▋         | 135/2000 [1:58:21<55:42:08, 107.52s/it]

Training_loss 0.59064,   Accuracy 0.75942


  7%|▋         | 136/2000 [2:00:08<55:41:20, 107.55s/it]

Training_loss 0.59031,   Accuracy 0.75944


  7%|▋         | 137/2000 [2:01:56<55:37:06, 107.48s/it]

Training_loss 0.59001,   Accuracy 0.75948


  7%|▋         | 138/2000 [2:03:42<55:25:00, 107.14s/it]

Training_loss 0.58978,   Accuracy 0.75951


  7%|▋         | 139/2000 [2:05:30<55:31:52, 107.42s/it]

Training_loss 0.58955,   Accuracy 0.75953


  7%|▋         | 140/2000 [2:07:20<55:54:45, 108.22s/it]

Training_loss 0.58927,   Accuracy 0.75953


  7%|▋         | 141/2000 [2:09:04<55:10:53, 106.86s/it]

Training_loss 0.58911,   Accuracy 0.75958


  7%|▋         | 142/2000 [2:10:50<55:04:40, 106.72s/it]

Training_loss 0.58890,   Accuracy 0.75961


  7%|▋         | 143/2000 [2:12:41<55:42:36, 108.00s/it]

Training_loss 0.58857,   Accuracy 0.75961


  7%|▋         | 144/2000 [2:14:25<55:02:07, 106.75s/it]

Training_loss 0.58838,   Accuracy 0.75968


  7%|▋         | 145/2000 [2:16:13<55:12:06, 107.13s/it]

Training_loss 0.58809,   Accuracy 0.75969


  7%|▋         | 146/2000 [2:18:02<55:23:39, 107.56s/it]

Training_loss 0.58786,   Accuracy 0.75968


  7%|▋         | 147/2000 [2:19:54<56:05:05, 108.96s/it]

Training_loss 0.58756,   Accuracy 0.75974


  7%|▋         | 148/2000 [2:21:38<55:20:54, 107.59s/it]

Training_loss 0.58726,   Accuracy 0.75973


  7%|▋         | 149/2000 [2:23:23<54:58:24, 106.92s/it]

Training_loss 0.58704,   Accuracy 0.75975


  8%|▊         | 150/2000 [2:25:10<54:56:58, 106.93s/it]

Training_loss 0.58672,   Accuracy 0.75970


  8%|▊         | 151/2000 [2:26:59<55:05:37, 107.27s/it]

Training_loss 0.58646,   Accuracy 0.75970


  8%|▊         | 152/2000 [2:28:48<55:22:54, 107.89s/it]

Training_loss 0.58630,   Accuracy 0.75971


  8%|▊         | 153/2000 [2:30:43<56:27:41, 110.05s/it]

Training_loss 0.58609,   Accuracy 0.75975


  8%|▊         | 154/2000 [2:32:29<55:45:50, 108.75s/it]

Training_loss 0.58586,   Accuracy 0.75978


  8%|▊         | 155/2000 [2:34:17<55:35:53, 108.48s/it]

Training_loss 0.58562,   Accuracy 0.75975


  8%|▊         | 156/2000 [2:36:01<55:00:39, 107.40s/it]

Training_loss 0.58546,   Accuracy 0.75980


  8%|▊         | 157/2000 [2:37:47<54:41:37, 106.84s/it]

Training_loss 0.58527,   Accuracy 0.75982


  8%|▊         | 158/2000 [2:39:31<54:12:52, 105.96s/it]

Training_loss 0.58507,   Accuracy 0.75983


  8%|▊         | 159/2000 [2:41:20<54:38:56, 106.86s/it]

Training_loss 0.58487,   Accuracy 0.75986


  8%|▊         | 160/2000 [2:43:18<56:19:04, 110.19s/it]

Training_loss 0.58465,   Accuracy 0.76006


  8%|▊         | 161/2000 [2:45:22<58:27:34, 114.44s/it]

Training_loss 0.58438,   Accuracy 0.75997


  8%|▊         | 162/2000 [2:47:16<58:18:44, 114.21s/it]

Training_loss 0.58418,   Accuracy 0.75996


  8%|▊         | 163/2000 [2:49:04<57:18:57, 112.32s/it]

Training_loss 0.58396,   Accuracy 0.76003


  8%|▊         | 164/2000 [2:51:02<58:11:53, 114.11s/it]

Training_loss 0.58369,   Accuracy 0.76017


  8%|▊         | 165/2000 [2:52:56<58:12:08, 114.18s/it]

Training_loss 0.58347,   Accuracy 0.76015


  8%|▊         | 166/2000 [2:54:59<59:30:59, 116.83s/it]

Training_loss 0.58330,   Accuracy 0.76018


  8%|▊         | 167/2000 [2:56:55<59:14:54, 116.36s/it]

Training_loss 0.58303,   Accuracy 0.76024


  8%|▊         | 168/2000 [2:58:42<57:51:42, 113.70s/it]

Training_loss 0.58284,   Accuracy 0.76032


  8%|▊         | 169/2000 [3:00:35<57:40:40, 113.40s/it]

Training_loss 0.58267,   Accuracy 0.76031


  8%|▊         | 170/2000 [3:02:25<57:07:40, 112.38s/it]

Training_loss 0.58252,   Accuracy 0.76041


  9%|▊         | 171/2000 [3:04:10<55:58:05, 110.16s/it]

Training_loss 0.58233,   Accuracy 0.76046


  9%|▊         | 172/2000 [3:05:55<55:12:01, 108.71s/it]

Training_loss 0.58217,   Accuracy 0.76048


  9%|▊         | 173/2000 [3:07:45<55:19:38, 109.02s/it]

Training_loss 0.58194,   Accuracy 0.76047


  9%|▊         | 174/2000 [3:09:29<54:33:28, 107.56s/it]

Training_loss 0.58176,   Accuracy 0.76048


  9%|▉         | 175/2000 [3:11:14<54:05:36, 106.70s/it]

Training_loss 0.58156,   Accuracy 0.76052


  9%|▉         | 176/2000 [3:13:00<53:55:46, 106.44s/it]

Training_loss 0.58134,   Accuracy 0.76050


  9%|▉         | 177/2000 [3:14:45<53:48:53, 106.27s/it]

Training_loss 0.58115,   Accuracy 0.76055


  9%|▉         | 178/2000 [3:16:36<54:23:03, 107.46s/it]

Training_loss 0.58099,   Accuracy 0.76056


  9%|▉         | 179/2000 [3:18:23<54:17:30, 107.33s/it]

Training_loss 0.58080,   Accuracy 0.76051


  9%|▉         | 180/2000 [3:20:11<54:25:14, 107.65s/it]

Training_loss 0.58064,   Accuracy 0.76058


  9%|▉         | 181/2000 [3:21:56<54:00:56, 106.90s/it]

Training_loss 0.58045,   Accuracy 0.76056


  9%|▉         | 182/2000 [3:23:42<53:45:31, 106.45s/it]

Training_loss 0.58026,   Accuracy 0.76061


  9%|▉         | 183/2000 [3:25:36<54:51:22, 108.69s/it]

Training_loss 0.58008,   Accuracy 0.76067


  9%|▉         | 184/2000 [3:27:22<54:30:27, 108.05s/it]

Training_loss 0.57989,   Accuracy 0.76065


  9%|▉         | 185/2000 [3:29:10<54:28:13, 108.04s/it]

Training_loss 0.57976,   Accuracy 0.76072


  9%|▉         | 186/2000 [3:30:57<54:18:53, 107.79s/it]

Training_loss 0.57959,   Accuracy 0.76068


  9%|▉         | 187/2000 [3:32:43<53:58:24, 107.17s/it]

Training_loss 0.57945,   Accuracy 0.76075


  9%|▉         | 188/2000 [3:34:30<53:51:52, 107.02s/it]

Training_loss 0.57923,   Accuracy 0.76072


  9%|▉         | 189/2000 [3:36:17<53:52:27, 107.09s/it]

Training_loss 0.57905,   Accuracy 0.76079


 10%|▉         | 190/2000 [3:38:09<54:32:40, 108.49s/it]

Training_loss 0.57879,   Accuracy 0.76082


 10%|▉         | 191/2000 [3:39:55<54:11:13, 107.83s/it]

Training_loss 0.57863,   Accuracy 0.76078


 10%|▉         | 192/2000 [3:41:48<54:57:10, 109.42s/it]

Training_loss 0.57845,   Accuracy 0.76083


 10%|▉         | 193/2000 [3:43:49<56:39:08, 112.87s/it]

Training_loss 0.57827,   Accuracy 0.76083


 10%|▉         | 194/2000 [3:45:48<57:35:43, 114.81s/it]

Training_loss 0.57809,   Accuracy 0.76085


 10%|▉         | 195/2000 [3:47:40<57:02:45, 113.78s/it]

Training_loss 0.57796,   Accuracy 0.76103


 10%|▉         | 196/2000 [3:49:29<56:18:22, 112.36s/it]

Training_loss 0.57774,   Accuracy 0.76103


 10%|▉         | 197/2000 [3:51:15<55:18:31, 110.43s/it]

Training_loss 0.57757,   Accuracy 0.76100


 10%|▉         | 198/2000 [3:53:06<55:26:21, 110.76s/it]

Training_loss 0.57747,   Accuracy 0.76102


 10%|▉         | 199/2000 [3:55:03<56:18:13, 112.55s/it]

Training_loss 0.57729,   Accuracy 0.76106


 10%|█         | 200/2000 [3:56:57<56:28:18, 112.94s/it]

Training_loss 0.57711,   Accuracy 0.76100


 10%|█         | 201/2000 [3:58:49<56:17:28, 112.65s/it]

Training_loss 0.57702,   Accuracy 0.76106


 10%|█         | 202/2000 [4:00:41<56:14:26, 112.61s/it]

Training_loss 0.57683,   Accuracy 0.76105


 10%|█         | 203/2000 [4:02:26<55:00:16, 110.19s/it]

Training_loss 0.57666,   Accuracy 0.76107


 10%|█         | 204/2000 [4:04:11<54:13:16, 108.68s/it]

Training_loss 0.57646,   Accuracy 0.76107


 10%|█         | 205/2000 [4:05:59<54:07:25, 108.55s/it]

Training_loss 0.57623,   Accuracy 0.76090


 10%|█         | 206/2000 [4:07:47<54:00:11, 108.37s/it]

Training_loss 0.57611,   Accuracy 0.76110


 10%|█         | 207/2000 [4:09:34<53:46:59, 107.99s/it]

Training_loss 0.57591,   Accuracy 0.76111


 10%|█         | 208/2000 [4:11:20<53:20:48, 107.17s/it]

Training_loss 0.57576,   Accuracy 0.76113


 10%|█         | 209/2000 [4:13:03<52:47:41, 106.12s/it]

Training_loss 0.57558,   Accuracy 0.76111


 10%|█         | 210/2000 [4:14:48<52:36:51, 105.82s/it]

Training_loss 0.57543,   Accuracy 0.76118


 11%|█         | 211/2000 [4:16:34<52:37:12, 105.89s/it]

Training_loss 0.57527,   Accuracy 0.76114


 11%|█         | 212/2000 [4:18:22<52:47:37, 106.30s/it]

Training_loss 0.57513,   Accuracy 0.76118


 11%|█         | 213/2000 [4:20:07<52:38:00, 106.03s/it]

Training_loss 0.57502,   Accuracy 0.76114


 11%|█         | 214/2000 [4:22:00<53:36:58, 108.07s/it]

Training_loss 0.57487,   Accuracy 0.76113


 11%|█         | 215/2000 [4:23:46<53:18:22, 107.51s/it]

Training_loss 0.57474,   Accuracy 0.76116


 11%|█         | 216/2000 [4:25:32<52:57:43, 106.87s/it]

Training_loss 0.57459,   Accuracy 0.76118


 11%|█         | 217/2000 [4:27:19<53:00:32, 107.03s/it]

Training_loss 0.57442,   Accuracy 0.76113


 11%|█         | 218/2000 [4:29:04<52:44:51, 106.56s/it]

Training_loss 0.57424,   Accuracy 0.76113


 11%|█         | 219/2000 [4:30:57<53:39:18, 108.45s/it]

Training_loss 0.57414,   Accuracy 0.76121


 11%|█         | 220/2000 [4:32:42<53:04:39, 107.35s/it]

Training_loss 0.57397,   Accuracy 0.76121


 11%|█         | 221/2000 [4:34:38<54:16:31, 109.83s/it]

Training_loss 0.57382,   Accuracy 0.76124


 11%|█         | 222/2000 [4:36:23<53:38:55, 108.62s/it]

Training_loss 0.57374,   Accuracy 0.76131


 11%|█         | 223/2000 [4:38:19<54:36:27, 110.63s/it]

Training_loss 0.57361,   Accuracy 0.76133


 11%|█         | 224/2000 [4:40:03<53:42:12, 108.86s/it]

Training_loss 0.57349,   Accuracy 0.76132


 11%|█▏        | 225/2000 [4:41:59<54:38:57, 110.84s/it]

Training_loss 0.57333,   Accuracy 0.76131


 11%|█▏        | 226/2000 [4:44:07<57:12:25, 116.09s/it]

Training_loss 0.57325,   Accuracy 0.76137


 11%|█▏        | 227/2000 [4:46:23<60:06:05, 122.03s/it]

Training_loss 0.57317,   Accuracy 0.76143


 11%|█▏        | 228/2000 [4:48:29<60:38:52, 123.21s/it]

Training_loss 0.57303,   Accuracy 0.76136


 11%|█▏        | 229/2000 [4:50:22<59:08:24, 120.22s/it]

Training_loss 0.57290,   Accuracy 0.76134


 12%|█▏        | 230/2000 [4:52:08<56:59:31, 115.92s/it]

Training_loss 0.57275,   Accuracy 0.76137


 12%|█▏        | 231/2000 [4:53:54<55:30:48, 112.97s/it]

Training_loss 0.57265,   Accuracy 0.76139


 12%|█▏        | 232/2000 [4:55:42<54:43:11, 111.42s/it]

Training_loss 0.57243,   Accuracy 0.76136


 12%|█▏        | 233/2000 [4:57:31<54:14:17, 110.50s/it]

Training_loss 0.57229,   Accuracy 0.76136


 12%|█▏        | 234/2000 [4:59:17<53:35:46, 109.26s/it]

Training_loss 0.57220,   Accuracy 0.76138


 12%|█▏        | 235/2000 [5:01:25<56:18:13, 114.84s/it]

Training_loss 0.57215,   Accuracy 0.76161


 12%|█▏        | 236/2000 [5:03:12<55:05:27, 112.43s/it]

Training_loss 0.57202,   Accuracy 0.76161


 12%|█▏        | 237/2000 [5:04:58<54:07:24, 110.52s/it]

Training_loss 0.57189,   Accuracy 0.76162


 12%|█▏        | 238/2000 [5:06:49<54:12:32, 110.76s/it]

Training_loss 0.57180,   Accuracy 0.76161


 12%|█▏        | 239/2000 [5:08:33<53:16:15, 108.90s/it]

Training_loss 0.57165,   Accuracy 0.76161


 12%|█▏        | 240/2000 [5:10:18<52:37:46, 107.65s/it]

Training_loss 0.57152,   Accuracy 0.76147


 12%|█▏        | 241/2000 [5:12:04<52:18:09, 107.04s/it]

Training_loss 0.57137,   Accuracy 0.76144


 12%|█▏        | 242/2000 [5:13:49<51:58:54, 106.45s/it]

Training_loss 0.57124,   Accuracy 0.76156


 12%|█▏        | 243/2000 [5:15:34<51:47:01, 106.10s/it]

Training_loss 0.57111,   Accuracy 0.76155


 12%|█▏        | 244/2000 [5:17:22<52:01:08, 106.65s/it]

Training_loss 0.57097,   Accuracy 0.76159


 12%|█▏        | 245/2000 [5:19:09<51:57:49, 106.59s/it]

Training_loss 0.57083,   Accuracy 0.76148


 12%|█▏        | 246/2000 [5:20:55<51:52:46, 106.48s/it]

Training_loss 0.57070,   Accuracy 0.76147


 12%|█▏        | 247/2000 [5:22:48<52:52:44, 108.59s/it]

Training_loss 0.57057,   Accuracy 0.76145


 12%|█▏        | 248/2000 [5:24:35<52:32:04, 107.95s/it]

Training_loss 0.57046,   Accuracy 0.76148


 12%|█▏        | 249/2000 [5:26:22<52:21:59, 107.66s/it]

Training_loss 0.57037,   Accuracy 0.76148


 12%|█▎        | 250/2000 [5:28:10<52:25:26, 107.84s/it]

Training_loss 0.57023,   Accuracy 0.76149


 13%|█▎        | 251/2000 [5:30:00<52:41:25, 108.45s/it]

Training_loss 0.57010,   Accuracy 0.76152


 13%|█▎        | 252/2000 [5:31:51<53:00:50, 109.18s/it]

Training_loss 0.57002,   Accuracy 0.76154


 13%|█▎        | 253/2000 [5:33:36<52:26:12, 108.06s/it]

Training_loss 0.56993,   Accuracy 0.76161


 13%|█▎        | 254/2000 [5:35:27<52:50:48, 108.96s/it]

Training_loss 0.56980,   Accuracy 0.76172


 13%|█▎        | 255/2000 [5:37:18<53:07:09, 109.59s/it]

Training_loss 0.56968,   Accuracy 0.76172


 13%|█▎        | 256/2000 [5:39:05<52:38:07, 108.65s/it]

Training_loss 0.56958,   Accuracy 0.76180


 13%|█▎        | 257/2000 [5:40:49<52:00:53, 107.43s/it]

Training_loss 0.56946,   Accuracy 0.76176


 13%|█▎        | 258/2000 [5:42:48<53:38:19, 110.85s/it]

Training_loss 0.56938,   Accuracy 0.76191


 13%|█▎        | 259/2000 [5:44:47<54:47:02, 113.28s/it]

Training_loss 0.56927,   Accuracy 0.76193


 13%|█▎        | 260/2000 [5:46:44<55:20:19, 114.49s/it]

Training_loss 0.56918,   Accuracy 0.76190


 13%|█▎        | 261/2000 [5:48:31<54:07:43, 112.05s/it]

Training_loss 0.56908,   Accuracy 0.76195


 13%|█▎        | 262/2000 [5:50:16<53:05:42, 109.98s/it]

Training_loss 0.56897,   Accuracy 0.76195


 13%|█▎        | 263/2000 [5:52:07<53:14:43, 110.35s/it]

Training_loss 0.56886,   Accuracy 0.76191


 13%|█▎        | 264/2000 [5:54:05<54:13:33, 112.45s/it]

Training_loss 0.56874,   Accuracy 0.76215


 13%|█▎        | 265/2000 [5:56:04<55:12:50, 114.57s/it]

Training_loss 0.56860,   Accuracy 0.76213


 13%|█▎        | 266/2000 [5:58:04<55:53:32, 116.04s/it]

Training_loss 0.56847,   Accuracy 0.76209


 13%|█▎        | 267/2000 [5:59:59<55:47:09, 115.89s/it]

Training_loss 0.56837,   Accuracy 0.76208


 13%|█▎        | 268/2000 [6:01:58<56:09:32, 116.73s/it]

Training_loss 0.56830,   Accuracy 0.76210


 13%|█▎        | 269/2000 [6:03:45<54:47:08, 113.94s/it]

Training_loss 0.56821,   Accuracy 0.76225


 14%|█▎        | 270/2000 [6:05:30<53:26:38, 111.21s/it]

Training_loss 0.56813,   Accuracy 0.76213


 14%|█▎        | 271/2000 [6:07:17<52:46:58, 109.90s/it]

Training_loss 0.56806,   Accuracy 0.76217


 14%|█▎        | 272/2000 [6:09:04<52:19:59, 109.03s/it]

Training_loss 0.56794,   Accuracy 0.76215


 14%|█▎        | 273/2000 [6:10:53<52:20:58, 109.12s/it]

Training_loss 0.56784,   Accuracy 0.76220


 14%|█▎        | 274/2000 [6:12:38<51:40:44, 107.79s/it]

Training_loss 0.56772,   Accuracy 0.76219


 14%|█▍        | 275/2000 [6:14:26<51:43:55, 107.96s/it]

Training_loss 0.56760,   Accuracy 0.76210


 14%|█▍        | 276/2000 [6:16:13<51:31:35, 107.60s/it]

Training_loss 0.56747,   Accuracy 0.76220


 14%|█▍        | 277/2000 [6:17:59<51:15:38, 107.10s/it]

Training_loss 0.56735,   Accuracy 0.76218


 14%|█▍        | 278/2000 [6:19:44<50:57:02, 106.52s/it]

Training_loss 0.56725,   Accuracy 0.76221


 14%|█▍        | 279/2000 [6:21:34<51:22:56, 107.48s/it]

Training_loss 0.56714,   Accuracy 0.76217


 14%|█▍        | 280/2000 [6:23:20<51:06:38, 106.98s/it]

Training_loss 0.56711,   Accuracy 0.76225


 14%|█▍        | 281/2000 [6:25:04<50:42:19, 106.19s/it]

Training_loss 0.56702,   Accuracy 0.76241


 14%|█▍        | 282/2000 [6:26:53<51:03:14, 106.98s/it]

Training_loss 0.56692,   Accuracy 0.76234


 14%|█▍        | 283/2000 [6:28:40<51:03:45, 107.06s/it]

Training_loss 0.56684,   Accuracy 0.76230


 14%|█▍        | 284/2000 [6:30:28<51:10:29, 107.36s/it]

Training_loss 0.56676,   Accuracy 0.76232


 14%|█▍        | 285/2000 [6:32:30<53:15:08, 111.78s/it]

Training_loss 0.56669,   Accuracy 0.76235


 14%|█▍        | 286/2000 [6:34:25<53:38:54, 112.68s/it]

Training_loss 0.56655,   Accuracy 0.76241


 14%|█▍        | 287/2000 [6:36:15<53:12:03, 111.81s/it]

Training_loss 0.56646,   Accuracy 0.76242


 14%|█▍        | 288/2000 [6:38:04<52:50:56, 111.13s/it]

Training_loss 0.56638,   Accuracy 0.76245


 14%|█▍        | 289/2000 [6:39:49<51:49:57, 109.06s/it]

Training_loss 0.56629,   Accuracy 0.76253


 14%|█▍        | 290/2000 [6:41:40<52:10:10, 109.83s/it]

Training_loss 0.56619,   Accuracy 0.76257


 15%|█▍        | 291/2000 [6:43:39<53:26:20, 112.57s/it]

Training_loss 0.56611,   Accuracy 0.76257


 15%|█▍        | 292/2000 [6:45:37<54:07:48, 114.09s/it]

Training_loss 0.56598,   Accuracy 0.76255


 15%|█▍        | 293/2000 [6:47:34<54:28:38, 114.89s/it]

Training_loss 0.56586,   Accuracy 0.76254


 15%|█▍        | 294/2000 [6:49:30<54:42:05, 115.43s/it]

Training_loss 0.56576,   Accuracy 0.76261


 15%|█▍        | 295/2000 [6:51:27<54:52:41, 115.87s/it]

Training_loss 0.56567,   Accuracy 0.76270


 15%|█▍        | 296/2000 [6:53:23<54:49:22, 115.82s/it]

Training_loss 0.56557,   Accuracy 0.76253


 15%|█▍        | 297/2000 [6:55:32<56:39:47, 119.78s/it]

Training_loss 0.56550,   Accuracy 0.76265


 15%|█▍        | 298/2000 [6:57:43<58:16:18, 123.25s/it]

Training_loss 0.56542,   Accuracy 0.76267


 15%|█▍        | 299/2000 [6:59:32<56:10:18, 118.88s/it]

Training_loss 0.56534,   Accuracy 0.76267


 15%|█▌        | 300/2000 [7:01:29<55:51:41, 118.29s/it]

Training_loss 0.56523,   Accuracy 0.76272


 15%|█▌        | 301/2000 [7:03:15<54:05:26, 114.61s/it]

Training_loss 0.56516,   Accuracy 0.76278


 15%|█▌        | 302/2000 [7:05:03<53:06:02, 112.58s/it]

Training_loss 0.56506,   Accuracy 0.76275


 15%|█▌        | 303/2000 [7:06:48<51:59:27, 110.29s/it]

Training_loss 0.56496,   Accuracy 0.76279


 15%|█▌        | 304/2000 [7:08:33<51:12:13, 108.69s/it]

Training_loss 0.56484,   Accuracy 0.76271


 15%|█▌        | 305/2000 [7:10:29<52:11:50, 110.86s/it]

Training_loss 0.56476,   Accuracy 0.76271


 15%|█▌        | 306/2000 [7:12:23<52:40:59, 111.96s/it]

Training_loss 0.56469,   Accuracy 0.76282


 15%|█▌        | 307/2000 [7:14:27<54:22:40, 115.63s/it]

Training_loss 0.56460,   Accuracy 0.76279


 15%|█▌        | 308/2000 [7:16:27<54:54:12, 116.82s/it]

Training_loss 0.56454,   Accuracy 0.76280


 15%|█▌        | 309/2000 [7:18:31<55:53:24, 118.99s/it]

Training_loss 0.56446,   Accuracy 0.76289


 16%|█▌        | 310/2000 [7:20:32<56:08:10, 119.58s/it]

Training_loss 0.56441,   Accuracy 0.76296


 16%|█▌        | 311/2000 [7:22:25<55:12:02, 117.66s/it]

Training_loss 0.56431,   Accuracy 0.76296


 16%|█▌        | 312/2000 [7:24:16<54:09:51, 115.52s/it]

Training_loss 0.56422,   Accuracy 0.76288


 16%|█▌        | 313/2000 [7:26:07<53:32:42, 114.26s/it]

Training_loss 0.56415,   Accuracy 0.76305


 16%|█▌        | 314/2000 [7:27:58<53:08:31, 113.47s/it]

Training_loss 0.56407,   Accuracy 0.76311


 16%|█▌        | 315/2000 [7:29:44<51:59:49, 111.09s/it]

Training_loss 0.56402,   Accuracy 0.76316


 16%|█▌        | 316/2000 [7:31:31<51:24:05, 109.88s/it]

Training_loss 0.56396,   Accuracy 0.76316


 16%|█▌        | 317/2000 [7:33:15<50:33:19, 108.14s/it]

Training_loss 0.56387,   Accuracy 0.76313


 16%|█▌        | 318/2000 [7:35:01<50:11:43, 107.43s/it]

Training_loss 0.56378,   Accuracy 0.76313


 16%|█▌        | 319/2000 [7:36:48<50:03:40, 107.21s/it]

Training_loss 0.56369,   Accuracy 0.76297


 16%|█▌        | 320/2000 [7:38:34<49:57:08, 107.04s/it]

Training_loss 0.56364,   Accuracy 0.76321


 16%|█▌        | 321/2000 [7:40:20<49:40:08, 106.50s/it]

Training_loss 0.56353,   Accuracy 0.76321


 16%|█▌        | 322/2000 [7:42:16<51:00:09, 109.42s/it]

Training_loss 0.56343,   Accuracy 0.76313


 16%|█▌        | 323/2000 [7:44:17<52:38:33, 113.01s/it]

Training_loss 0.56334,   Accuracy 0.76303


 16%|█▌        | 324/2000 [7:46:19<53:49:55, 115.63s/it]

Training_loss 0.56325,   Accuracy 0.76305


 16%|█▋        | 325/2000 [7:48:11<53:20:40, 114.65s/it]

Training_loss 0.56319,   Accuracy 0.76315


 16%|█▋        | 326/2000 [7:49:58<52:08:58, 112.15s/it]

Training_loss 0.56311,   Accuracy 0.76327


 16%|█▋        | 327/2000 [7:51:47<51:41:15, 111.22s/it]

Training_loss 0.56300,   Accuracy 0.76312


 16%|█▋        | 328/2000 [7:53:31<50:42:08, 109.17s/it]

Training_loss 0.56292,   Accuracy 0.76313


 16%|█▋        | 329/2000 [7:55:20<50:41:37, 109.21s/it]

Training_loss 0.56287,   Accuracy 0.76316


 16%|█▋        | 330/2000 [7:57:07<50:19:41, 108.49s/it]

Training_loss 0.56280,   Accuracy 0.76321


 17%|█▋        | 331/2000 [7:58:53<49:52:18, 107.57s/it]

Training_loss 0.56275,   Accuracy 0.76338


 17%|█▋        | 332/2000 [8:00:37<49:23:28, 106.60s/it]

Training_loss 0.56267,   Accuracy 0.76340


 17%|█▋        | 333/2000 [8:02:22<49:09:37, 106.17s/it]

Training_loss 0.56260,   Accuracy 0.76343


 17%|█▋        | 334/2000 [8:04:10<49:19:08, 106.57s/it]

Training_loss 0.56251,   Accuracy 0.76345


 17%|█▋        | 335/2000 [8:05:55<49:09:56, 106.30s/it]

Training_loss 0.56243,   Accuracy 0.76346


 17%|█▋        | 336/2000 [8:07:45<49:36:54, 107.34s/it]

Training_loss 0.56234,   Accuracy 0.76351


 17%|█▋        | 337/2000 [8:09:30<49:16:55, 106.68s/it]

Training_loss 0.56226,   Accuracy 0.76355


 17%|█▋        | 338/2000 [8:11:15<48:59:06, 106.11s/it]

Training_loss 0.56219,   Accuracy 0.76350


 17%|█▋        | 339/2000 [8:12:59<48:44:57, 105.66s/it]

Training_loss 0.56210,   Accuracy 0.76359


 17%|█▋        | 340/2000 [8:14:45<48:38:11, 105.48s/it]

Training_loss 0.56203,   Accuracy 0.76352


 17%|█▋        | 341/2000 [8:16:31<48:46:50, 105.85s/it]

Training_loss 0.56193,   Accuracy 0.76350


 17%|█▋        | 342/2000 [8:18:22<49:28:37, 107.43s/it]

Training_loss 0.56189,   Accuracy 0.76358


 17%|█▋        | 343/2000 [8:20:13<49:55:21, 108.46s/it]

Training_loss 0.56185,   Accuracy 0.76360


 17%|█▋        | 344/2000 [8:22:09<50:55:03, 110.69s/it]

Training_loss 0.56178,   Accuracy 0.76369


 17%|█▋        | 345/2000 [8:24:01<51:05:42, 111.14s/it]

Training_loss 0.56168,   Accuracy 0.76358


 17%|█▋        | 346/2000 [8:25:54<51:19:59, 111.73s/it]

Training_loss 0.56160,   Accuracy 0.76359


 17%|█▋        | 347/2000 [8:27:54<52:19:00, 113.94s/it]

Training_loss 0.56153,   Accuracy 0.76357


 17%|█▋        | 348/2000 [8:29:48<52:18:52, 114.00s/it]

Training_loss 0.56148,   Accuracy 0.76369


 17%|█▋        | 349/2000 [8:31:47<52:58:53, 115.53s/it]

Training_loss 0.56142,   Accuracy 0.76377


 18%|█▊        | 350/2000 [8:33:59<55:18:10, 120.66s/it]

Training_loss 0.56139,   Accuracy 0.76376


 18%|█▊        | 351/2000 [8:36:27<58:57:36, 128.72s/it]

Training_loss 0.56134,   Accuracy 0.76376


 18%|█▊        | 352/2000 [8:38:44<60:05:59, 131.29s/it]

Training_loss 0.56126,   Accuracy 0.76382


 18%|█▊        | 353/2000 [8:40:53<59:40:18, 130.43s/it]

Training_loss 0.56120,   Accuracy 0.76394


 18%|█▊        | 354/2000 [8:43:16<61:24:00, 134.29s/it]

Training_loss 0.56112,   Accuracy 0.76395


 18%|█▊        | 355/2000 [8:45:32<61:36:58, 134.84s/it]

Training_loss 0.56104,   Accuracy 0.76397


 18%|█▊        | 356/2000 [8:47:41<60:44:45, 133.02s/it]

Training_loss 0.56096,   Accuracy 0.76396


 18%|█▊        | 357/2000 [8:49:37<58:26:35, 128.06s/it]

Training_loss 0.56091,   Accuracy 0.76392


 18%|█▊        | 358/2000 [8:51:40<57:42:42, 126.53s/it]

Training_loss 0.56083,   Accuracy 0.76396


 18%|█▊        | 359/2000 [8:53:57<59:05:36, 129.64s/it]

Training_loss 0.56077,   Accuracy 0.76395


 18%|█▊        | 360/2000 [8:55:53<57:11:34, 125.55s/it]

Training_loss 0.56069,   Accuracy 0.76400


 18%|█▊        | 361/2000 [8:57:55<56:36:23, 124.33s/it]

Training_loss 0.56061,   Accuracy 0.76400


 18%|█▊        | 362/2000 [8:59:40<53:56:42, 118.56s/it]

Training_loss 0.56053,   Accuracy 0.76399


 18%|█▊        | 363/2000 [9:01:29<52:36:46, 115.70s/it]

Training_loss 0.56046,   Accuracy 0.76403


 18%|█▊        | 364/2000 [9:03:20<51:56:52, 114.31s/it]

Training_loss 0.56037,   Accuracy 0.76401


 18%|█▊        | 365/2000 [9:05:05<50:39:12, 111.53s/it]

Training_loss 0.56027,   Accuracy 0.76398


 18%|█▊        | 366/2000 [9:06:54<50:14:41, 110.70s/it]

Training_loss 0.56021,   Accuracy 0.76403


 18%|█▊        | 367/2000 [9:08:39<49:29:34, 109.11s/it]

Training_loss 0.56015,   Accuracy 0.76411


 18%|█▊        | 368/2000 [9:10:30<49:38:50, 109.52s/it]

Training_loss 0.56009,   Accuracy 0.76410


 18%|█▊        | 369/2000 [9:12:13<48:45:41, 107.63s/it]

Training_loss 0.56001,   Accuracy 0.76405


 18%|█▊        | 370/2000 [9:14:00<48:43:08, 107.60s/it]

Training_loss 0.55995,   Accuracy 0.76419


 19%|█▊        | 371/2000 [9:15:47<48:30:36, 107.20s/it]

Training_loss 0.55988,   Accuracy 0.76412


 19%|█▊        | 372/2000 [9:17:34<48:29:27, 107.23s/it]

Training_loss 0.55982,   Accuracy 0.76420


 19%|█▊        | 373/2000 [9:19:20<48:21:34, 107.00s/it]

Training_loss 0.55974,   Accuracy 0.76420


 19%|█▊        | 374/2000 [9:21:05<48:00:36, 106.30s/it]

Training_loss 0.55972,   Accuracy 0.76422


 19%|█▉        | 375/2000 [9:22:50<47:46:24, 105.84s/it]

Training_loss 0.55963,   Accuracy 0.76419


 19%|█▉        | 376/2000 [9:24:34<47:34:56, 105.48s/it]

Training_loss 0.55954,   Accuracy 0.76414


 19%|█▉        | 377/2000 [9:26:28<48:37:17, 107.85s/it]

Training_loss 0.55947,   Accuracy 0.76420


 19%|█▉        | 378/2000 [9:28:17<48:42:57, 108.12s/it]

Training_loss 0.55940,   Accuracy 0.76425


 19%|█▉        | 379/2000 [9:30:04<48:37:33, 107.99s/it]

Training_loss 0.55936,   Accuracy 0.76429


 19%|█▉        | 380/2000 [9:31:51<48:28:07, 107.71s/it]

Training_loss 0.55933,   Accuracy 0.76428


 19%|█▉        | 381/2000 [9:33:41<48:44:12, 108.37s/it]

Training_loss 0.55927,   Accuracy 0.76433


 19%|█▉        | 382/2000 [9:35:42<50:19:47, 111.98s/it]

Training_loss 0.55923,   Accuracy 0.76433


 19%|█▉        | 383/2000 [9:37:43<51:36:18, 114.89s/it]

Training_loss 0.55919,   Accuracy 0.76436


 19%|█▉        | 384/2000 [9:39:29<50:20:30, 112.15s/it]

Training_loss 0.55915,   Accuracy 0.76436


 19%|█▉        | 385/2000 [9:41:18<49:49:30, 111.07s/it]

Training_loss 0.55907,   Accuracy 0.76441


 19%|█▉        | 386/2000 [9:43:15<50:37:19, 112.91s/it]

Training_loss 0.55903,   Accuracy 0.76462


 19%|█▉        | 387/2000 [9:45:15<51:32:33, 115.04s/it]

Training_loss 0.55896,   Accuracy 0.76467


 19%|█▉        | 388/2000 [9:47:09<51:25:43, 114.85s/it]

Training_loss 0.55889,   Accuracy 0.76466


 19%|█▉        | 389/2000 [9:49:03<51:17:30, 114.62s/it]

Training_loss 0.55887,   Accuracy 0.76459


 20%|█▉        | 390/2000 [9:50:48<49:52:19, 111.52s/it]

Training_loss 0.55881,   Accuracy 0.76440


 20%|█▉        | 391/2000 [9:52:38<49:37:57, 111.05s/it]

Training_loss 0.55875,   Accuracy 0.76439


 20%|█▉        | 392/2000 [9:54:31<49:56:30, 111.81s/it]

Training_loss 0.55871,   Accuracy 0.76466


 20%|█▉        | 393/2000 [9:56:27<50:27:12, 113.03s/it]

Training_loss 0.55861,   Accuracy 0.76444


 20%|█▉        | 394/2000 [9:58:18<50:10:16, 112.46s/it]

Training_loss 0.55854,   Accuracy 0.76451


 20%|█▉        | 395/2000 [10:00:09<49:59:08, 112.12s/it]

Training_loss 0.55849,   Accuracy 0.76450


 20%|█▉        | 396/2000 [10:02:06<50:32:51, 113.45s/it]

Training_loss 0.55845,   Accuracy 0.76447


 20%|█▉        | 397/2000 [10:04:05<51:13:35, 115.04s/it]

Training_loss 0.55838,   Accuracy 0.76454


 20%|█▉        | 398/2000 [10:05:51<50:05:06, 112.55s/it]

Training_loss 0.55829,   Accuracy 0.76458


 20%|█▉        | 399/2000 [10:07:39<49:22:13, 111.01s/it]

Training_loss 0.55824,   Accuracy 0.76459


 20%|██        | 400/2000 [10:09:24<48:33:31, 109.26s/it]

Training_loss 0.55819,   Accuracy 0.76469


 20%|██        | 401/2000 [10:11:11<48:12:44, 108.55s/it]

Training_loss 0.55813,   Accuracy 0.76469


 20%|██        | 402/2000 [10:12:55<47:38:27, 107.33s/it]

Training_loss 0.55806,   Accuracy 0.76464


 20%|██        | 403/2000 [10:14:49<48:29:37, 109.32s/it]

Training_loss 0.55798,   Accuracy 0.76465


 20%|██        | 404/2000 [10:16:36<48:02:28, 108.36s/it]

Training_loss 0.55793,   Accuracy 0.76467


 20%|██        | 405/2000 [10:18:24<47:58:54, 108.30s/it]

Training_loss 0.55787,   Accuracy 0.76469


 20%|██        | 406/2000 [10:20:11<47:52:18, 108.12s/it]

Training_loss 0.55782,   Accuracy 0.76470


 20%|██        | 407/2000 [10:21:57<47:32:56, 107.46s/it]

Training_loss 0.55776,   Accuracy 0.76471


 20%|██        | 408/2000 [10:23:44<47:22:31, 107.13s/it]

Training_loss 0.55771,   Accuracy 0.76472


 20%|██        | 409/2000 [10:25:31<47:20:46, 107.13s/it]

Training_loss 0.55765,   Accuracy 0.76493


 20%|██        | 410/2000 [10:27:17<47:08:40, 106.74s/it]

Training_loss 0.55759,   Accuracy 0.76484


 21%|██        | 411/2000 [10:29:12<48:15:29, 109.33s/it]

Training_loss 0.55752,   Accuracy 0.76481


 21%|██        | 412/2000 [10:31:03<48:27:14, 109.85s/it]

Training_loss 0.55747,   Accuracy 0.76492


 21%|██        | 413/2000 [10:32:48<47:44:09, 108.29s/it]

Training_loss 0.55741,   Accuracy 0.76485


 21%|██        | 414/2000 [10:34:36<47:40:43, 108.22s/it]

Training_loss 0.55738,   Accuracy 0.76497


 21%|██        | 415/2000 [10:36:20<47:10:36, 107.15s/it]

Training_loss 0.55731,   Accuracy 0.76497


 21%|██        | 416/2000 [10:38:09<47:20:09, 107.58s/it]

Training_loss 0.55725,   Accuracy 0.76498


 21%|██        | 417/2000 [10:39:56<47:12:00, 107.34s/it]

Training_loss 0.55717,   Accuracy 0.76499


 21%|██        | 418/2000 [10:41:57<48:59:03, 111.47s/it]

Training_loss 0.55710,   Accuracy 0.76510


 21%|██        | 419/2000 [10:43:53<49:32:25, 112.81s/it]

Training_loss 0.55705,   Accuracy 0.76529


 21%|██        | 420/2000 [10:46:06<52:14:50, 119.04s/it]

Training_loss 0.55700,   Accuracy 0.76508


 21%|██        | 421/2000 [10:48:17<53:41:31, 122.41s/it]

Training_loss 0.55695,   Accuracy 0.76530


 21%|██        | 422/2000 [10:50:16<53:12:24, 121.38s/it]

Training_loss 0.55688,   Accuracy 0.76522


 21%|██        | 423/2000 [10:52:19<53:28:51, 122.09s/it]

Training_loss 0.55683,   Accuracy 0.76541


 21%|██        | 424/2000 [10:54:28<54:16:24, 123.97s/it]

Training_loss 0.55674,   Accuracy 0.76540


 21%|██▏       | 425/2000 [10:56:14<51:50:48, 118.51s/it]

Training_loss 0.55669,   Accuracy 0.76538


 21%|██▏       | 426/2000 [10:58:00<50:10:25, 114.76s/it]

Training_loss 0.55661,   Accuracy 0.76534


 21%|██▏       | 427/2000 [10:59:46<49:01:03, 112.18s/it]

Training_loss 0.55654,   Accuracy 0.76533


 21%|██▏       | 428/2000 [11:01:31<48:06:54, 110.19s/it]

Training_loss 0.55647,   Accuracy 0.76535


 21%|██▏       | 429/2000 [11:03:16<47:21:21, 108.52s/it]

Training_loss 0.55644,   Accuracy 0.76544


 22%|██▏       | 430/2000 [11:05:03<47:09:34, 108.14s/it]

Training_loss 0.55643,   Accuracy 0.76547


 22%|██▏       | 431/2000 [11:06:53<47:20:10, 108.61s/it]

Training_loss 0.55636,   Accuracy 0.76550


 22%|██▏       | 432/2000 [11:08:40<47:06:51, 108.17s/it]

Training_loss 0.55632,   Accuracy 0.76558


 22%|██▏       | 433/2000 [11:10:25<46:38:22, 107.15s/it]

Training_loss 0.55627,   Accuracy 0.76564


 22%|██▏       | 434/2000 [11:12:09<46:14:13, 106.29s/it]

Training_loss 0.55619,   Accuracy 0.76567


 22%|██▏       | 435/2000 [11:13:53<45:54:18, 105.60s/it]

Training_loss 0.55614,   Accuracy 0.76567


 22%|██▏       | 436/2000 [11:15:38<45:48:50, 105.45s/it]

Training_loss 0.55607,   Accuracy 0.76573


 22%|██▏       | 437/2000 [11:17:31<46:42:47, 107.59s/it]

Training_loss 0.55599,   Accuracy 0.76564


 22%|██▏       | 438/2000 [11:19:22<47:09:26, 108.69s/it]

Training_loss 0.55594,   Accuracy 0.76567


 22%|██▏       | 439/2000 [11:21:06<46:34:15, 107.40s/it]

Training_loss 0.55589,   Accuracy 0.76564


 22%|██▏       | 440/2000 [11:22:52<46:20:11, 106.93s/it]

Training_loss 0.55586,   Accuracy 0.76566


 22%|██▏       | 441/2000 [11:24:37<46:03:37, 106.36s/it]

Training_loss 0.55581,   Accuracy 0.76566


 22%|██▏       | 442/2000 [11:26:24<46:05:46, 106.51s/it]

Training_loss 0.55579,   Accuracy 0.76589


 22%|██▏       | 443/2000 [11:28:12<46:16:52, 107.01s/it]

Training_loss 0.55576,   Accuracy 0.76596


 22%|██▏       | 444/2000 [11:30:02<46:39:26, 107.95s/it]

Training_loss 0.55571,   Accuracy 0.76606


 22%|██▏       | 445/2000 [11:31:53<47:00:49, 108.84s/it]

Training_loss 0.55566,   Accuracy 0.76606


 22%|██▏       | 446/2000 [11:33:38<46:24:38, 107.51s/it]

Training_loss 0.55565,   Accuracy 0.76615


 22%|██▏       | 447/2000 [11:35:24<46:15:22, 107.23s/it]

Training_loss 0.55563,   Accuracy 0.76631


 22%|██▏       | 448/2000 [11:37:12<46:13:47, 107.23s/it]

Training_loss 0.55559,   Accuracy 0.76635


 22%|██▏       | 449/2000 [11:38:57<45:59:09, 106.74s/it]

Training_loss 0.55553,   Accuracy 0.76634


 22%|██▎       | 450/2000 [11:40:45<46:07:13, 107.12s/it]

Training_loss 0.55547,   Accuracy 0.76636


 23%|██▎       | 451/2000 [11:42:30<45:47:25, 106.42s/it]

Training_loss 0.55542,   Accuracy 0.76636


 23%|██▎       | 452/2000 [11:44:14<45:31:00, 105.85s/it]

Training_loss 0.55539,   Accuracy 0.76631


 23%|██▎       | 453/2000 [11:45:58<45:15:02, 105.30s/it]

Training_loss 0.55535,   Accuracy 0.76640


 23%|██▎       | 454/2000 [11:47:49<45:50:16, 106.74s/it]

Training_loss 0.55531,   Accuracy 0.76650


 23%|██▎       | 455/2000 [11:49:37<46:05:05, 107.38s/it]

Training_loss 0.55527,   Accuracy 0.76653


 23%|██▎       | 456/2000 [11:51:22<45:44:48, 106.66s/it]

Training_loss 0.55524,   Accuracy 0.76661


 23%|██▎       | 457/2000 [11:53:12<46:05:22, 107.53s/it]

Training_loss 0.55516,   Accuracy 0.76659


 23%|██▎       | 458/2000 [11:55:00<46:08:32, 107.73s/it]

Training_loss 0.55513,   Accuracy 0.76665


 23%|██▎       | 459/2000 [11:56:46<45:48:44, 107.02s/it]

Training_loss 0.55509,   Accuracy 0.76671


 23%|██▎       | 460/2000 [11:58:32<45:40:15, 106.76s/it]

Training_loss 0.55505,   Accuracy 0.76681


 23%|██▎       | 461/2000 [12:00:23<46:13:35, 108.13s/it]

Training_loss 0.55500,   Accuracy 0.76688


 23%|██▎       | 462/2000 [12:02:09<45:53:05, 107.40s/it]

Training_loss 0.55496,   Accuracy 0.76689


 23%|██▎       | 463/2000 [12:03:57<45:56:32, 107.61s/it]

Training_loss 0.55489,   Accuracy 0.76675


 23%|██▎       | 464/2000 [12:05:43<45:45:42, 107.25s/it]

Training_loss 0.55486,   Accuracy 0.76685


 23%|██▎       | 465/2000 [12:07:36<46:26:09, 108.90s/it]

Training_loss 0.55483,   Accuracy 0.76681


 23%|██▎       | 466/2000 [12:09:22<46:05:20, 108.16s/it]

Training_loss 0.55475,   Accuracy 0.76681


 23%|██▎       | 467/2000 [12:11:07<45:37:21, 107.14s/it]

Training_loss 0.55471,   Accuracy 0.76689


 23%|██▎       | 468/2000 [12:12:52<45:16:31, 106.39s/it]

Training_loss 0.55466,   Accuracy 0.76689


 23%|██▎       | 469/2000 [12:14:37<45:02:39, 105.92s/it]

Training_loss 0.55462,   Accuracy 0.76699


 24%|██▎       | 470/2000 [12:16:21<44:46:11, 105.34s/it]

Training_loss 0.55456,   Accuracy 0.76691


 24%|██▎       | 471/2000 [12:18:10<45:13:38, 106.49s/it]

Training_loss 0.55452,   Accuracy 0.76696


 24%|██▎       | 472/2000 [12:19:56<45:12:01, 106.49s/it]

Training_loss 0.55449,   Accuracy 0.76702


 24%|██▎       | 473/2000 [12:21:43<45:14:43, 106.67s/it]

Training_loss 0.55445,   Accuracy 0.76702


 24%|██▎       | 474/2000 [12:23:29<45:01:49, 106.23s/it]

Training_loss 0.55440,   Accuracy 0.76707


 24%|██▍       | 475/2000 [12:25:15<45:01:46, 106.30s/it]

Training_loss 0.55434,   Accuracy 0.76706


 24%|██▍       | 476/2000 [12:27:02<45:02:02, 106.38s/it]

Training_loss 0.55432,   Accuracy 0.76710


 24%|██▍       | 477/2000 [12:28:48<45:02:06, 106.45s/it]

Training_loss 0.55426,   Accuracy 0.76710


 24%|██▍       | 478/2000 [12:30:38<45:22:49, 107.34s/it]

Training_loss 0.55422,   Accuracy 0.76705


 24%|██▍       | 479/2000 [12:32:23<45:02:37, 106.61s/it]

Training_loss 0.55418,   Accuracy 0.76709


 24%|██▍       | 480/2000 [12:34:08<44:49:07, 106.15s/it]

Training_loss 0.55416,   Accuracy 0.76713


 24%|██▍       | 481/2000 [12:35:54<44:52:46, 106.36s/it]

Training_loss 0.55412,   Accuracy 0.76712


 24%|██▍       | 482/2000 [12:37:53<46:21:44, 109.95s/it]

Training_loss 0.55407,   Accuracy 0.76715


 24%|██▍       | 483/2000 [12:39:40<45:59:01, 109.12s/it]

Training_loss 0.55402,   Accuracy 0.76723


 24%|██▍       | 484/2000 [12:41:31<46:14:38, 109.81s/it]

Training_loss 0.55397,   Accuracy 0.76715


 24%|██▍       | 485/2000 [12:43:30<47:20:29, 112.49s/it]

Training_loss 0.55398,   Accuracy 0.76724


 24%|██▍       | 486/2000 [12:45:35<48:49:29, 116.10s/it]

Training_loss 0.55394,   Accuracy 0.76721


 24%|██▍       | 487/2000 [12:47:29<48:37:21, 115.69s/it]

Training_loss 0.55390,   Accuracy 0.76722


 24%|██▍       | 488/2000 [12:49:24<48:24:32, 115.26s/it]

Training_loss 0.55385,   Accuracy 0.76723


 24%|██▍       | 489/2000 [12:51:13<47:34:19, 113.34s/it]

Training_loss 0.55377,   Accuracy 0.76728


 24%|██▍       | 490/2000 [12:53:37<51:26:51, 122.66s/it]

Training_loss 0.55371,   Accuracy 0.76743


 25%|██▍       | 491/2000 [12:55:42<51:45:30, 123.48s/it]

Training_loss 0.55369,   Accuracy 0.76753


 25%|██▍       | 492/2000 [12:57:36<50:32:58, 120.68s/it]

Training_loss 0.55366,   Accuracy 0.76762


 25%|██▍       | 493/2000 [12:59:21<48:29:54, 115.86s/it]

Training_loss 0.55365,   Accuracy 0.76763


 25%|██▍       | 494/2000 [13:01:20<48:52:37, 116.84s/it]

Training_loss 0.55361,   Accuracy 0.76778


 25%|██▍       | 495/2000 [13:03:27<50:04:29, 119.78s/it]

Training_loss 0.55360,   Accuracy 0.76794


 25%|██▍       | 496/2000 [13:05:31<50:34:34, 121.06s/it]

Training_loss 0.55357,   Accuracy 0.76808


 25%|██▍       | 497/2000 [13:07:40<51:31:52, 123.43s/it]

Training_loss 0.55351,   Accuracy 0.76808


 25%|██▍       | 498/2000 [13:09:46<51:46:48, 124.11s/it]

Training_loss 0.55349,   Accuracy 0.76809


 25%|██▍       | 499/2000 [13:11:53<52:12:01, 125.20s/it]

Training_loss 0.55342,   Accuracy 0.76809


 25%|██▌       | 500/2000 [13:13:45<50:28:01, 121.12s/it]

Training_loss 0.55338,   Accuracy 0.76818


 25%|██▌       | 501/2000 [13:15:37<49:19:23, 118.45s/it]

Training_loss 0.55333,   Accuracy 0.76822


 25%|██▌       | 502/2000 [13:17:41<50:00:27, 120.18s/it]

Training_loss 0.55327,   Accuracy 0.76826


 25%|██▌       | 503/2000 [13:19:33<48:57:35, 117.74s/it]

Training_loss 0.55325,   Accuracy 0.76847


 25%|██▌       | 504/2000 [13:21:30<48:45:29, 117.33s/it]

Training_loss 0.55323,   Accuracy 0.76836


 25%|██▌       | 505/2000 [13:23:27<48:39:27, 117.17s/it]

Training_loss 0.55318,   Accuracy 0.76846


 25%|██▌       | 506/2000 [13:25:22<48:24:47, 116.66s/it]

Training_loss 0.55315,   Accuracy 0.76839


 25%|██▌       | 507/2000 [13:27:09<47:12:53, 113.85s/it]

Training_loss 0.55312,   Accuracy 0.76839


 25%|██▌       | 508/2000 [13:28:56<46:16:52, 111.67s/it]

Training_loss 0.55307,   Accuracy 0.76845


 25%|██▌       | 509/2000 [13:30:44<45:50:16, 110.68s/it]

Training_loss 0.55301,   Accuracy 0.76853


 26%|██▌       | 510/2000 [13:32:33<45:34:52, 110.13s/it]

Training_loss 0.55297,   Accuracy 0.76854


 26%|██▌       | 511/2000 [13:34:21<45:16:19, 109.46s/it]

Training_loss 0.55293,   Accuracy 0.76842


 26%|██▌       | 512/2000 [13:36:08<44:53:39, 108.62s/it]

Training_loss 0.55293,   Accuracy 0.76857


 26%|██▌       | 513/2000 [13:38:00<45:22:13, 109.84s/it]

Training_loss 0.55289,   Accuracy 0.76865


 26%|██▌       | 514/2000 [13:39:46<44:47:24, 108.51s/it]

Training_loss 0.55286,   Accuracy 0.76864


 26%|██▌       | 515/2000 [13:41:38<45:11:34, 109.56s/it]

Training_loss 0.55282,   Accuracy 0.76864


 26%|██▌       | 516/2000 [13:43:36<46:10:50, 112.03s/it]

Training_loss 0.55279,   Accuracy 0.76867


 26%|██▌       | 517/2000 [13:45:42<47:59:37, 116.51s/it]

Training_loss 0.55273,   Accuracy 0.76866


 26%|██▌       | 518/2000 [13:47:38<47:46:51, 116.07s/it]

Training_loss 0.55268,   Accuracy 0.76866


 26%|██▌       | 519/2000 [13:49:26<46:49:22, 113.82s/it]

Training_loss 0.55266,   Accuracy 0.76848


 26%|██▌       | 520/2000 [13:51:13<45:54:12, 111.66s/it]

Training_loss 0.55261,   Accuracy 0.76864


 26%|██▌       | 521/2000 [13:52:59<45:15:49, 110.18s/it]

Training_loss 0.55256,   Accuracy 0.76863


 26%|██▌       | 522/2000 [13:54:47<44:52:48, 109.32s/it]

Training_loss 0.55252,   Accuracy 0.76865


 26%|██▌       | 523/2000 [13:56:32<44:24:15, 108.23s/it]

Training_loss 0.55246,   Accuracy 0.76860


 26%|██▌       | 524/2000 [13:58:21<44:25:15, 108.34s/it]

Training_loss 0.55244,   Accuracy 0.76874


 26%|██▋       | 525/2000 [14:00:09<44:23:31, 108.35s/it]

Training_loss 0.55240,   Accuracy 0.76872


 26%|██▋       | 526/2000 [14:01:54<43:57:38, 107.37s/it]

Training_loss 0.55234,   Accuracy 0.76875


 26%|██▋       | 527/2000 [14:03:38<43:27:37, 106.22s/it]

Training_loss 0.55230,   Accuracy 0.76883


 26%|██▋       | 528/2000 [14:05:26<43:41:11, 106.84s/it]

Training_loss 0.55226,   Accuracy 0.76870


 26%|██▋       | 529/2000 [14:07:22<44:45:29, 109.54s/it]

Training_loss 0.55220,   Accuracy 0.76873


 26%|██▋       | 530/2000 [14:09:13<44:54:39, 109.99s/it]

Training_loss 0.55213,   Accuracy 0.76884


 27%|██▋       | 531/2000 [14:11:00<44:28:41, 109.00s/it]

Training_loss 0.55213,   Accuracy 0.76885


 27%|██▋       | 532/2000 [14:12:54<45:04:21, 110.53s/it]

Training_loss 0.55209,   Accuracy 0.76888


 27%|██▋       | 533/2000 [14:14:57<46:36:52, 114.39s/it]

Training_loss 0.55207,   Accuracy 0.76890


 27%|██▋       | 534/2000 [14:16:54<46:51:41, 115.08s/it]

Training_loss 0.55205,   Accuracy 0.76890


 27%|██▋       | 535/2000 [14:18:50<46:56:53, 115.37s/it]

Training_loss 0.55200,   Accuracy 0.76889


 27%|██▋       | 536/2000 [14:20:46<47:02:29, 115.68s/it]

Training_loss 0.55197,   Accuracy 0.76894


 27%|██▋       | 537/2000 [14:22:41<46:52:11, 115.33s/it]

Training_loss 0.55193,   Accuracy 0.76896


 27%|██▋       | 538/2000 [14:24:32<46:19:55, 114.09s/it]

Training_loss 0.55187,   Accuracy 0.76901


 27%|██▋       | 539/2000 [14:26:20<45:29:15, 112.08s/it]

Training_loss 0.55186,   Accuracy 0.76902


 27%|██▋       | 540/2000 [14:28:09<45:10:53, 111.41s/it]

Training_loss 0.55181,   Accuracy 0.76901


 27%|██▋       | 541/2000 [14:29:56<44:31:36, 109.87s/it]

Training_loss 0.55180,   Accuracy 0.76908


 27%|██▋       | 542/2000 [14:31:48<44:46:46, 110.57s/it]

Training_loss 0.55176,   Accuracy 0.76903


 27%|██▋       | 543/2000 [14:33:35<44:22:07, 109.63s/it]

Training_loss 0.55172,   Accuracy 0.76909


 27%|██▋       | 544/2000 [14:35:22<44:00:54, 108.83s/it]

Training_loss 0.55171,   Accuracy 0.76907


 27%|██▋       | 545/2000 [14:37:13<44:10:23, 109.29s/it]

Training_loss 0.55162,   Accuracy 0.76909


 27%|██▋       | 546/2000 [14:39:00<43:52:41, 108.64s/it]

Training_loss 0.55156,   Accuracy 0.76914


 27%|██▋       | 547/2000 [14:40:48<43:46:17, 108.45s/it]

Training_loss 0.55153,   Accuracy 0.76910


 27%|██▋       | 548/2000 [14:42:51<45:28:08, 112.73s/it]

Training_loss 0.55150,   Accuracy 0.76914


 27%|██▋       | 549/2000 [14:44:52<46:30:14, 115.38s/it]

Training_loss 0.55145,   Accuracy 0.76914


 28%|██▊       | 550/2000 [14:45:09<34:35:00, 85.86s/it] 

Training_loss 0.55142,   Accuracy 0.76932


 28%|██▊       | 551/2000 [14:45:23<25:52:00, 64.27s/it]

Training_loss 0.55135,   Accuracy 0.76942


 28%|██▊       | 552/2000 [14:45:37<19:45:29, 49.12s/it]

Training_loss 0.55132,   Accuracy 0.76966


 28%|██▊       | 553/2000 [14:45:53<15:44:06, 39.15s/it]

Training_loss 0.55127,   Accuracy 0.76954


 28%|██▊       | 554/2000 [14:46:08<12:50:23, 31.97s/it]

Training_loss 0.55124,   Accuracy 0.76961


 28%|██▊       | 555/2000 [14:46:21<10:34:45, 26.36s/it]

Training_loss 0.55119,   Accuracy 0.76963


 28%|██▊       | 556/2000 [14:46:35<9:06:00, 22.69s/it] 

Training_loss 0.55115,   Accuracy 0.76978


 28%|██▊       | 557/2000 [14:46:50<8:08:58, 20.33s/it]

Training_loss 0.55110,   Accuracy 0.76957


 28%|██▊       | 558/2000 [14:47:05<7:29:27, 18.70s/it]

Training_loss 0.55108,   Accuracy 0.76976


 28%|██▊       | 559/2000 [14:47:19<6:57:53, 17.40s/it]

Training_loss 0.55103,   Accuracy 0.76958


 28%|██▊       | 560/2000 [14:47:34<6:39:24, 16.64s/it]

Training_loss 0.55098,   Accuracy 0.76959


 28%|██▊       | 561/2000 [14:47:51<6:41:39, 16.75s/it]

Training_loss 0.55095,   Accuracy 0.76953


 28%|██▊       | 562/2000 [14:48:13<7:14:58, 18.15s/it]

Training_loss 0.55091,   Accuracy 0.76950


 28%|██▊       | 563/2000 [14:48:35<7:47:13, 19.51s/it]

Training_loss 0.55089,   Accuracy 0.76953


 28%|██▊       | 564/2000 [14:48:54<7:39:02, 19.18s/it]

Training_loss 0.55087,   Accuracy 0.76963


 28%|██▊       | 565/2000 [14:49:14<7:48:27, 19.59s/it]

Training_loss 0.55087,   Accuracy 0.76979


 28%|██▊       | 566/2000 [14:49:35<7:59:06, 20.05s/it]

Training_loss 0.55085,   Accuracy 0.76986


 28%|██▊       | 567/2000 [14:49:54<7:49:58, 19.68s/it]

Training_loss 0.55081,   Accuracy 0.76982


 28%|██▊       | 568/2000 [14:50:14<7:50:55, 19.73s/it]

Training_loss 0.55074,   Accuracy 0.76966


 28%|██▊       | 569/2000 [14:50:41<8:41:18, 21.86s/it]

Training_loss 0.55067,   Accuracy 0.76973


 28%|██▊       | 570/2000 [14:51:13<9:51:36, 24.82s/it]

Training_loss 0.55063,   Accuracy 0.76980


 29%|██▊       | 571/2000 [14:51:37<9:49:11, 24.74s/it]

Training_loss 0.55058,   Accuracy 0.76981


 29%|██▊       | 572/2000 [14:52:03<9:53:33, 24.94s/it]

Training_loss 0.55053,   Accuracy 0.76982


 29%|██▊       | 573/2000 [14:52:31<10:14:49, 25.85s/it]

Training_loss 0.55049,   Accuracy 0.76967


 29%|██▊       | 574/2000 [14:52:55<10:07:39, 25.57s/it]

Training_loss 0.55045,   Accuracy 0.76959


 29%|██▉       | 575/2000 [14:53:19<9:52:37, 24.95s/it] 

Training_loss 0.55042,   Accuracy 0.76966


 29%|██▉       | 576/2000 [14:53:41<9:30:14, 24.03s/it]

Training_loss 0.55037,   Accuracy 0.76964


 29%|██▉       | 577/2000 [14:54:04<9:21:04, 23.66s/it]

Training_loss 0.55034,   Accuracy 0.76973


 29%|██▉       | 578/2000 [14:54:32<9:53:21, 25.04s/it]

Training_loss 0.55029,   Accuracy 0.76985


 29%|██▉       | 579/2000 [14:54:56<9:49:34, 24.89s/it]

Training_loss 0.55024,   Accuracy 0.76993


 29%|██▉       | 580/2000 [14:55:22<9:56:01, 25.18s/it]

Training_loss 0.55023,   Accuracy 0.76996


 29%|██▉       | 581/2000 [14:55:49<10:06:55, 25.66s/it]

Training_loss 0.55017,   Accuracy 0.77009


 29%|██▉       | 582/2000 [14:56:18<10:29:09, 26.62s/it]

Training_loss 0.55013,   Accuracy 0.76986


 29%|██▉       | 583/2000 [14:56:51<11:14:49, 28.57s/it]

Training_loss 0.55011,   Accuracy 0.76990


 29%|██▉       | 584/2000 [14:57:13<10:28:01, 26.61s/it]

Training_loss 0.55006,   Accuracy 0.76996


 29%|██▉       | 585/2000 [14:57:36<9:59:55, 25.44s/it] 

Training_loss 0.55003,   Accuracy 0.76998


 29%|██▉       | 586/2000 [14:58:00<9:48:30, 24.97s/it]

Training_loss 0.54998,   Accuracy 0.77001


 29%|██▉       | 587/2000 [14:58:20<9:17:20, 23.67s/it]

Training_loss 0.54997,   Accuracy 0.77000


 29%|██▉       | 588/2000 [14:58:42<9:00:59, 22.99s/it]

Training_loss 0.54994,   Accuracy 0.76999


 29%|██▉       | 589/2000 [14:58:59<8:18:50, 21.21s/it]

Training_loss 0.54993,   Accuracy 0.77019


 30%|██▉       | 590/2000 [14:59:19<8:14:39, 21.05s/it]

Training_loss 0.54990,   Accuracy 0.77018


 30%|██▉       | 591/2000 [14:59:41<8:15:10, 21.09s/it]

Training_loss 0.54986,   Accuracy 0.77038


 30%|██▉       | 592/2000 [15:00:10<9:13:32, 23.59s/it]

Training_loss 0.54983,   Accuracy 0.77025


 30%|██▉       | 593/2000 [15:00:36<9:31:29, 24.37s/it]

Training_loss 0.54980,   Accuracy 0.77028


 30%|██▉       | 594/2000 [15:01:02<9:39:19, 24.72s/it]

Training_loss 0.54978,   Accuracy 0.77051


 30%|██▉       | 595/2000 [15:01:25<9:31:10, 24.39s/it]

Training_loss 0.54976,   Accuracy 0.77039


 30%|██▉       | 596/2000 [15:01:43<8:44:14, 22.40s/it]

Training_loss 0.54971,   Accuracy 0.77031


 30%|██▉       | 597/2000 [15:02:03<8:25:16, 21.61s/it]

Training_loss 0.54967,   Accuracy 0.77042


 30%|██▉       | 598/2000 [15:02:21<7:57:51, 20.45s/it]

Training_loss 0.54961,   Accuracy 0.77023


 30%|██▉       | 599/2000 [15:02:43<8:07:20, 20.87s/it]

Training_loss 0.54955,   Accuracy 0.77033


 30%|███       | 600/2000 [15:03:10<8:52:24, 22.82s/it]

Training_loss 0.54952,   Accuracy 0.77040


 30%|███       | 601/2000 [15:03:29<8:23:44, 21.60s/it]

Training_loss 0.54948,   Accuracy 0.77045


 30%|███       | 602/2000 [15:03:50<8:19:52, 21.45s/it]

Training_loss 0.54944,   Accuracy 0.77047


 30%|███       | 603/2000 [15:04:12<8:26:36, 21.76s/it]

Training_loss 0.54939,   Accuracy 0.77049


 30%|███       | 604/2000 [15:04:32<8:10:27, 21.08s/it]

Training_loss 0.54936,   Accuracy 0.77048


 30%|███       | 605/2000 [15:04:50<7:52:39, 20.33s/it]

Training_loss 0.54935,   Accuracy 0.77034


 30%|███       | 606/2000 [15:05:07<7:28:24, 19.30s/it]

Training_loss 0.54931,   Accuracy 0.77044


 30%|███       | 607/2000 [15:05:27<7:31:31, 19.45s/it]

Training_loss 0.54928,   Accuracy 0.77036


 30%|███       | 608/2000 [15:05:42<7:03:42, 18.26s/it]

Training_loss 0.54924,   Accuracy 0.77039


 30%|███       | 609/2000 [15:06:01<7:03:10, 18.25s/it]

Training_loss 0.54921,   Accuracy 0.77030


 30%|███       | 610/2000 [15:06:17<6:47:49, 17.60s/it]

Training_loss 0.54916,   Accuracy 0.77036


 31%|███       | 611/2000 [15:06:33<6:38:55, 17.23s/it]

Training_loss 0.54913,   Accuracy 0.77036


 31%|███       | 612/2000 [15:06:54<7:01:44, 18.23s/it]

Training_loss 0.54914,   Accuracy 0.77043


 31%|███       | 613/2000 [15:07:08<6:34:25, 17.06s/it]

Training_loss 0.54910,   Accuracy 0.77058


 31%|███       | 614/2000 [15:07:24<6:23:37, 16.61s/it]

Training_loss 0.54908,   Accuracy 0.77047


 31%|███       | 615/2000 [15:07:39<6:17:53, 16.37s/it]

Training_loss 0.54904,   Accuracy 0.77043


 31%|███       | 616/2000 [15:07:59<6:37:38, 17.24s/it]

Training_loss 0.54903,   Accuracy 0.77061


 31%|███       | 617/2000 [15:08:15<6:30:39, 16.95s/it]

Training_loss 0.54900,   Accuracy 0.77063


 31%|███       | 618/2000 [15:08:33<6:34:33, 17.13s/it]

Training_loss 0.54900,   Accuracy 0.77058


 31%|███       | 619/2000 [15:08:53<6:55:06, 18.03s/it]

Training_loss 0.54894,   Accuracy 0.77070


 31%|███       | 620/2000 [15:09:16<7:31:49, 19.64s/it]

Training_loss 0.54891,   Accuracy 0.77064


 31%|███       | 621/2000 [15:09:41<8:04:58, 21.10s/it]

Training_loss 0.54886,   Accuracy 0.77063


 31%|███       | 622/2000 [15:10:02<8:08:22, 21.26s/it]

Training_loss 0.54883,   Accuracy 0.77057


 31%|███       | 623/2000 [15:10:20<7:42:44, 20.16s/it]

Training_loss 0.54882,   Accuracy 0.77068


 31%|███       | 624/2000 [15:10:40<7:40:30, 20.08s/it]

Training_loss 0.54879,   Accuracy 0.77068


 31%|███▏      | 625/2000 [15:11:02<7:56:25, 20.79s/it]

Training_loss 0.54879,   Accuracy 0.77086


 31%|███▏      | 626/2000 [15:11:21<7:44:00, 20.26s/it]

Training_loss 0.54876,   Accuracy 0.77083


 31%|███▏      | 627/2000 [15:11:43<7:52:46, 20.66s/it]

Training_loss 0.54872,   Accuracy 0.77083


 31%|███▏      | 628/2000 [15:12:04<7:54:20, 20.74s/it]

Training_loss 0.54868,   Accuracy 0.77081


 31%|███▏      | 629/2000 [15:12:23<7:46:32, 20.42s/it]

Training_loss 0.54864,   Accuracy 0.77097


 32%|███▏      | 630/2000 [15:12:39<7:16:16, 19.11s/it]

Training_loss 0.54865,   Accuracy 0.77098


 32%|███▏      | 631/2000 [15:12:57<7:05:28, 18.65s/it]

Training_loss 0.54861,   Accuracy 0.77103


 32%|███▏      | 632/2000 [15:13:13<6:43:59, 17.72s/it]

Training_loss 0.54854,   Accuracy 0.77087


 32%|███▏      | 633/2000 [15:13:28<6:27:06, 16.99s/it]

Training_loss 0.54849,   Accuracy 0.77091


 32%|███▏      | 634/2000 [15:13:43<6:14:52, 16.47s/it]

Training_loss 0.54851,   Accuracy 0.77088


 32%|███▏      | 635/2000 [15:13:58<6:06:00, 16.09s/it]

Training_loss 0.54846,   Accuracy 0.77104


 32%|███▏      | 636/2000 [15:14:21<6:53:18, 18.18s/it]

Training_loss 0.54841,   Accuracy 0.77094


 32%|███▏      | 637/2000 [15:14:44<7:20:45, 19.40s/it]

Training_loss 0.54841,   Accuracy 0.77099


 32%|███▏      | 638/2000 [15:14:59<6:51:23, 18.12s/it]

Training_loss 0.54839,   Accuracy 0.77100


 32%|███▏      | 639/2000 [15:15:15<6:35:56, 17.46s/it]

Training_loss 0.54836,   Accuracy 0.77095


 32%|███▏      | 640/2000 [15:15:30<6:22:08, 16.86s/it]

Training_loss 0.54835,   Accuracy 0.77094


 32%|███▏      | 641/2000 [15:15:52<6:58:08, 18.46s/it]

Training_loss 0.54834,   Accuracy 0.77091


 32%|███▏      | 642/2000 [15:16:18<7:48:36, 20.70s/it]

Training_loss 0.54834,   Accuracy 0.77108


 32%|███▏      | 643/2000 [15:16:39<7:49:27, 20.76s/it]

Training_loss 0.54830,   Accuracy 0.77104


 32%|███▏      | 644/2000 [15:17:02<8:05:34, 21.49s/it]

Training_loss 0.54826,   Accuracy 0.77107


 32%|███▏      | 645/2000 [15:17:23<7:59:14, 21.22s/it]

Training_loss 0.54822,   Accuracy 0.77106


 32%|███▏      | 646/2000 [15:17:39<7:23:40, 19.66s/it]

Training_loss 0.54818,   Accuracy 0.77104


 32%|███▏      | 647/2000 [15:17:55<6:58:49, 18.57s/it]

Training_loss 0.54818,   Accuracy 0.77102


 32%|███▏      | 648/2000 [15:18:19<7:35:36, 20.22s/it]

Training_loss 0.54813,   Accuracy 0.77108


 32%|███▏      | 649/2000 [15:18:42<7:51:33, 20.94s/it]

Training_loss 0.54809,   Accuracy 0.77117


 32%|███▎      | 650/2000 [15:18:59<7:28:56, 19.95s/it]

Training_loss 0.54805,   Accuracy 0.77115


 33%|███▎      | 651/2000 [15:19:14<6:50:07, 18.24s/it]

Training_loss 0.54801,   Accuracy 0.77121


 33%|███▎      | 652/2000 [15:19:29<6:29:19, 17.33s/it]

Training_loss 0.54798,   Accuracy 0.77110


 33%|███▎      | 653/2000 [15:19:48<6:41:05, 17.87s/it]

Training_loss 0.54793,   Accuracy 0.77111


 33%|███▎      | 654/2000 [15:20:05<6:36:15, 17.66s/it]

Training_loss 0.54790,   Accuracy 0.77124


 33%|███▎      | 655/2000 [15:20:24<6:42:02, 17.93s/it]

Training_loss 0.54786,   Accuracy 0.77122


 33%|███▎      | 656/2000 [15:20:41<6:34:59, 17.63s/it]

Training_loss 0.54781,   Accuracy 0.77118


 33%|███▎      | 657/2000 [15:21:04<7:11:33, 19.28s/it]

Training_loss 0.54779,   Accuracy 0.77113


 33%|███▎      | 658/2000 [15:21:21<6:58:32, 18.71s/it]

Training_loss 0.54778,   Accuracy 0.77130


 33%|███▎      | 658/2000 [15:21:29<31:19:23, 84.03s/it]


KeyboardInterrupt: 

In [20]:
#plot.plot(test_loss)
print(parameters_to_vector(models[19].parameters()), W[1])

NameError: name 'W' is not defined

In [21]:
test_loss = np.array(test_loss)
total_rel_error = np.array(total_rel_error)

in_cluster_proj_norm = np.array(in_cluster_proj_norm)
out_cluster_proj_norm = np.array(out_cluster_proj_norm)
in_cluster_proj_diff_norm = np.array(in_cluster_proj_diff_norm)
out_cluster_proj_diff_norm = np.array(out_cluster_proj_diff_norm)


In [22]:
'''
  0%|          | 1/2000 [00:12<6:59:49, 12.60s/it]
Training_loss 0.69317,   Accuracy 0.52177
  0%|          | 2/2000 [00:25<6:58:41, 12.57s/it]
Training_loss 0.69256,   Accuracy 0.52523
  0%|          | 3/2000 [00:37<6:55:30, 12.48s/it]
Training_loss 0.69282,   Accuracy 0.52400
  0%|          | 4/2000 [00:50<6:56:56, 12.53s/it]
Training_loss 0.69216,   Accuracy 0.52552
  0%|          | 5/2000 [01:05<7:35:34, 13.70s/it]
Training_loss 0.69178,   Accuracy 0.52765
  0%|          | 6/2000 [01:18<7:24:07, 13.36s/it]
Training_loss 0.69037,   Accuracy 0.55240
  0%|          | 7/2000 [01:32<7:32:02, 13.61s/it]
Training_loss 0.68986,   Accuracy 0.55653
  0%|          | 8/2000 [01:47<7:42:39, 13.94s/it]
Training_loss 0.68921,   Accuracy 0.56338
'''

'\n  0%|          | 1/2000 [00:12<6:59:49, 12.60s/it]\nTraining_loss 0.69317,   Accuracy 0.52177\n  0%|          | 2/2000 [00:25<6:58:41, 12.57s/it]\nTraining_loss 0.69256,   Accuracy 0.52523\n  0%|          | 3/2000 [00:37<6:55:30, 12.48s/it]\nTraining_loss 0.69282,   Accuracy 0.52400\n  0%|          | 4/2000 [00:50<6:56:56, 12.53s/it]\nTraining_loss 0.69216,   Accuracy 0.52552\n  0%|          | 5/2000 [01:05<7:35:34, 13.70s/it]\nTraining_loss 0.69178,   Accuracy 0.52765\n  0%|          | 6/2000 [01:18<7:24:07, 13.36s/it]\nTraining_loss 0.69037,   Accuracy 0.55240\n  0%|          | 7/2000 [01:32<7:32:02, 13.61s/it]\nTraining_loss 0.68986,   Accuracy 0.55653\n  0%|          | 8/2000 [01:47<7:42:39, 13.94s/it]\nTraining_loss 0.68921,   Accuracy 0.56338\n'

In [23]:
np.save( 'training_loss_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), test_loss)
np.save('relative_error_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), total_rel_error)
np.save( 'in_cluster_proj_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), in_cluster_proj_norm)
np.save('out_cluster_proj_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), out_cluster_proj_norm)
np.save( 'in_cluster_proj_diff_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), in_cluster_proj_diff_norm)
np.save('out_cluster_proj_diff_norm_sheave_fml' + str(lamda).replace('.', '_')+ '_pout' + str(pout).replace('.', '_'), out_cluster_proj_diff_norm)

In [24]:
# task_loss dictionary
for key, value in task_loss.items():
    # Convert the list to a NumPy array
    array_loss = np.array(value)
    
    # Save the NumPy array using the specified format
    np.save('training_loss_sheave_fml_task' + key + '_' + str(lamda).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), array_loss)

# task_rel_error dictionary
for key, value in task_rel_error.items():
    # Convert the list to a NumPy array
    array_rel_error = np.array(value)
    
    # Save the NumPy array using the specified format
    np.save('relative_error_sheave_fml_task' + key + '_' + str(lamda).replace('.', '_') + '_pout' + str(pout).replace('.', '_'), array_rel_error)