In [1]:
import sys
sys.path.insert(0, './../../../Models')
from sphere_points import generate_points

import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt

import torch
torch.manual_seed(0)
import torch.nn as nn
from tqdm import tqdm
from torch.optim import SGD
from torch.nn.functional import normalize, one_hot
# import torch.nn.functional as F
import torchvision
from torchvision import transforms
# from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader

In [2]:
# device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
# device = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
def plot_losses(losses):
    losses = np.array(losses)
    n_dims, epochs = losses.shape
    plt.figure(figsize = (12, 5))
    for l in range(n_dims):
        plt.subplot(1, n_dims, l + 1)
        plt.plot(1 + np.arange(epochs), losses[l])
        plt.title(f"Layer {l + 1} Loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
    plt.tight_layout()
    plt.show()

In [4]:
# model_loss = nn.CrossEntropyLoss()
num_classes = 10

In [5]:
def classifier_head_train(inp_embedding, classifier_weights, labels):
    # model_loss = nn.CrossEntropyLoss()
    inp_embedding = normalize(inp_embedding, p=2, dim=-1)
    classifier_output = torch.mm(inp_embedding, classifier_weights)
    # classifier_output = 1 - (torch.acos(classifier_output)/np.pi)
    classifier_output = classifier_output * one_hot(labels, num_classes = num_classes).type(torch.float32)
    # classifier_output = torch.softmax(classifier_output, dim=-1)
    # loss = model_loss(classifier_output, one_hot(labels, num_classes = num_classes).type(torch.float32))
    theta = 1
    loss = torch.mean(torch.log(2 - (theta * torch.sum(classifier_output,1))))
    return loss

In [6]:
def classifier_head(inp_embedding, classifier_weights, labels):
    inp_embedding = normalize(inp_embedding, p=2, dim=-1)
    classifier_output = torch.mm(inp_embedding, classifier_weights)
    # classifier_output = 1 - (torch.acos(classifier_output)/np.pi)
    # classifier_output = torch.softmax(classifier_output, dim=-1)
    # loss = model_loss(classifier_output, one_hot(labels, num_classes = num_classes).type(torch.float32))
    return torch.argmax(classifier_output, dim=1).tolist()

In [7]:
initial = None
# num_classes = 10

# Data dimension
# (num_data, num_features) => no dimension for batch size please
class Layer(nn.Linear):
    def __init__(self, in_features, out_features, bias, device, lr, apply_dropout=False):
        super().__init__(in_features, out_features, bias, device)
        self.out_features = out_features
        self.bias_flag = bias
        self.lr = lr
        self.num_classes = num_classes
        self.dimension = out_features
        # self.activation = DyT(1)
        # self.activation = nn.LeakyReLU(negative_slope=0.001)
        # self.leaky_relu = nn.LeakyReLU(negative_slope=0.001)
        # self.activation = nn.GELU()
        # self.opt = Adam(self.parameters(), lr = self.lr)
        # self.opt = SGD(self.parameters(), lr=self.lr, momentum=0.9)
        # self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.opt, 
        #                                                       milestones=[60], gamma=0.1)
        # nn.init.kaiming_normal_(self.weight, mode='fan_in')
        
        fc1_limit = np.sqrt(6.0 / in_features)
        torch.nn.init.uniform_(self.weight, a=-fc1_limit, b=fc1_limit)
        
        self.dropout = nn.Dropout(0.1)
        self.apply_dropout = apply_dropout
        global initial
        self.directions = generate_points(self.num_classes, self.dimension, steps = 10000)
        initial = np.array(self.directions)
        self.directions = [torch.tensor(t, dtype = torch.float32).to(device) for t in self.directions]
        self.direction_weights = torch.zeros((len(self.directions[0]), len(self.directions)), device=device, 
                                             requires_grad=False)
        for i in range(len(self.directions)):
            self.direction_weights[:, i] = normalize(self.directions[i], p = 2, dim=-1)

    def train(self, x, labels):
        if self.apply_dropout:
            x = self.dropout(x)
        # activation = nn.ELU()
        activation = nn.LeakyReLU(negative_slope=0.001)
        # activation = nn.Tanh
        # activation = self.activation
        opt = SGD(self.parameters(), lr=self.lr)
        # activation = nn.ReLU()
        # opt = Adam(self.parameters(), lr=self.lr)
        # print(f"gpu used {torch.cuda.max_memory_allocated(device=None)} memory")
        if self.bias_flag:
            y = activation(torch.mm(x, self.weight.T) + self.bias.unsqueeze(0))
        else:
            y = activation(torch.mm(x, self.weight.T))
        # if self.apply_dropout:
        #     x = self.dropout(x)
        # y = self.forward(x) # shape: (num_data, out_features)
        '''
        y = normalize(y, p = 2, dim = 1)
        '''
        # import pdb;pdb.set_trace()
        loss = classifier_head_train(y, self.direction_weights, labels)
        
        '''
        directions = torch.zeros_like(y)
        for i in range(y.shape[0]):
            directions[i, :] = self.directions[label[i]]
        
        loss = loss_layer(y, directions)
        '''
        opt.zero_grad(set_to_none=True)
        loss.backward(retain_graph = False)
        opt.step()
        # self.scheduler.step()
        
        # normalize the directions
        # self.directions.data = normalize(self.directions.data, p = 2, dim = 1)

        return loss.detach().item(), y
    
    def test(self, x, labels):
        with torch.no_grad():
            # activation = nn.ELU()
            # y = self.forward(x)
            activation = nn.LeakyReLU(negative_slope=0.001)
            # activation = nn.ReLU()
            # activation = nn.Tanh()
            # activation = self.activation
            if self.bias_flag:
                y = activation(torch.mm(x, self.weight.T) + self.bias.unsqueeze(0))
            else:
                y = activation(torch.mm(x, self.weight.T))
        max_idx_list =[]
        max_idx_list = classifier_head(y, self.direction_weights, labels)
        '''
        for dat in range(y.shape[0]):
            max = -np.inf
            max_idx = 0
            for i in range(self.num_classes):
                cos_sim = cos_similarity(y[dat, :].unsqueeze(0), self.directions[i].reshape(1, -1))
                if cos_sim > max:
                    max = cos_sim
                    max_idx = i
            max_idx_list.append(max_idx)
        '''
        return torch.tensor(max_idx_list, device=device), y

In [8]:
class Net(nn.Module):
    def __init__(self, dims_list, bias, epochs, lr, device):
        super(Net, self).__init__()
        self.dims_list = dims_list
        self.bias = bias
        self.epochs = epochs
        self.lr = lr
        self.device = device
        self.layers = []
        # self.sigmoid = nn.Sigmoid()
        global initial
        for d in range(len(self.dims_list) - 1):
            print(f"Initialization {d + 1} / {len(self.dims_list) - 1}")
            self.layers += [Layer(self.dims_list[d], self.dims_list[d + 1], self.bias, self.device, self.lr)]
            print("Complete\n")
        
    def train(self, train_loader, test_loader):
        layer_loss_list = []
        acc_train = []
        acc_test = []
        layer_w = [[] for _ in range(len(self.dims_list) - 1)]
        
        for i in range(len(self.layers)):
            layer_loss_list.append([])
        pbar = tqdm(total = self.epochs * len(train_loader) * len(self.layers), 
                    desc = f"Training", position = 0, leave = True)
        
        # Test the network
        with torch.no_grad():
           
            for i in range(len(self.layers)):
                layer_w[i].append(torch.norm(self.layers[i].weight, p=2).item())
            acc_train.append(net.test(trainloader))
            acc_test.append(net.test(testloader))
        
        for epoch in range(self.epochs):

            if epoch and not (epoch % 10): 
                # learning rate decay
                for i in range(len(self.layers)):
                    self.layers[i].lr = self.layers[i].lr - 0.1
                    print('lr decreased to ', self.layers[i].lr)
            
            loss_agg = [0] * len(self.layers)
            for dat in train_loader:
                x, label = dat
                x = x.to(device)
                label = label.to(device)
                for i in range(len(self.layers)):
                    
                    loss, y = self.layers[i].train(x, label)
                    self.layers[i].zero_grad(set_to_none=True)
                    x = y.detach()
                    loss_agg[i] += loss / len(train_loader)
                    del y
                    pbar.update(1)
            pbar.set_postfix(epoch = epoch + 1, loss = loss_agg)
            for i in range(len(self.layers)):
                layer_loss_list[i].append(loss_agg[i])
                
            # Test the network
            with torch.no_grad():
                for i in range(len(self.layers)):
                    layer_w[i].append(torch.norm(self.layers[i].weight, p=2).item())
                acc_train.append(net.test(train_loader))
                acc_test.append(net.test(test_loader))  
                
        pbar.close()
        return [layer_loss_list, acc_train, acc_test, layer_w]
    
    def test(self, data_loader):
        all_accuracy = []
        # correct = [0 for _ in range(len(self.layers) + 1)]
        # total = [0 for _ in range(len(self.layers) + 1)]
        correct = [0 for _ in range(len(self.layers))]
        total = [0 for _ in range(len(self.layers))]
        for dat in data_loader:
            x = dat[0]
            label = dat[1]
            x = x.to(device)
            label = label.to(device)
            num = label
            preds = []
            
            for i in range(len(self.layers)):
                pred, x = self.layers[i].test(x, label)
                preds.append(pred)
            
            for i in range(len(preds)):
                correct[i] += (preds[i] == num).sum().item()
                total[i] += num.shape[0]

            # m = np.array([p.cpu().numpy() for p in preds])
            # # from scipy import stats
            # # mode_pred = stats.mode(m)
            # # correct[-1] += (mode_pred[0].flatten() == num.cpu()).sum().item()
            # correct[-1] += (np.mean(m, axis = 0).astype(int).flatten() == num.cpu()).sum().item()
            # total[-1] += num.shape[0]
                
        all_accuracy.append(np.array(correct) / total[-1])
        return all_accuracy

In [9]:
flatten_transform = transforms.Lambda(lambda x: x.view(x.size(0), -1).squeeze())
'''
# Define data transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    flatten_transform
])
'''

# # Define data transformations
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.,), (0.5,)),
#     flatten_transform
# ])

transform = transforms.Compose([transforms.ToTensor(), flatten_transform]) # this normalizes to [0,1]

trainset = torchvision.datasets.MNIST(root='./../../../Data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./../../../Data', train=False, download=True, transform=transform)
batch_size = 50
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle = False)


# full_data_set = torch.utils.data.ConcatDataset([trainset, testset])

In [10]:
# ratio = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
ratio = [0.2]
test_acc = []
train_acc = []
w_layers = []
loss = []

output_layer_size = [1024]

num_runs = 5

# for r in ratio:
    # print(f"train ratio: {r}")
'''    
test_size = int(r * len(full_data_set))
train_size = len(full_data_set) - test_size
train_dataset, test_dataset = torch.utils.data.random_split(full_data_set, [train_size, test_size])

# Create DataLoader for training set
trainloader = DataLoader(train_dataset, batch_size = 64, shuffle=True)

# Create DataLoader for test set
testloader = DataLoader(test_dataset, batch_size = 64, shuffle=False)
'''
for l2_size in output_layer_size:
    for _ in range(num_runs):
        
        # dims_list = [784, 1024, 924, 824, 724, 624, 524, 324, 224, l2_size]
        # dims_list = [784, 1000, 34]
        dims_list = [784, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, l2_size]
        bias = True
        epochs = 200
        lr = 2.5
        num_classes = 10
        net = Net(dims_list, bias, epochs, lr, device)
        
        # Train the network
        layer_loss_list = net.train(trainloader, testloader)
        
        # plot_losses(layer_loss_list[0])
        
        '''
        # Test the network
        acc_train = net.test(trainloader)
        print(f"Train accuracy: {acc_train * 100:.2f}%")
        
        acc_test = net.test(testloader)
        print(f"Test accuracy: {acc_test * 100:.2f}%")
        '''
        loss.append(layer_loss_list[0])
        train_acc.append(layer_loss_list[1])
        test_acc.append(layer_loss_list[2])
        w_layers.append(layer_loss_list[3])

Initialization 1 / 9
Complete

Initialization 2 / 9
Complete

Initialization 3 / 9
Complete

Initialization 4 / 9
Complete

Initialization 5 / 9
Complete

Initialization 6 / 9
Complete

Initialization 7 / 9
Complete

Initialization 8 / 9
Complete

Initialization 9 / 9
Complete



Training:   5%| | 108229/2160000 [03:58<14:39:29, 38.88it/s, epoch=10, loss=[0.3582771361867589, 0.2981972372035182, 0.2952516698588928, 0.28905420338114113, 0.2914081706106662, 0.2857880684981746, 0.2912

lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4


Training:  10%| | 216127/2160000 [07:54<16:34:43, 32.57it/s, epoch=20, loss=[0.3526476955413817, 0.29080683658520345, 0.28452212286492207, 0.27954770406087237, 0.27809948215881974, 0.275966898947954, 0.28

lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3


Training:  15%|▏| 324135/2160000 [11:51<12:55:09, 39.47it/s, epoch=30, loss=[0.3498916897674403, 0.28707348364094887, 0.28064168875416107, 0.2758496234814325, 0.2735936849812665, 0.2725363573680323, 0.279

lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997


Training:  20%|▏| 432145/2160000 [15:47<14:43:10, 32.61it/s, epoch=40, loss=[0.3487006595482428, 0.2852154844254255, 0.27869465418159944, 0.27399173900484997, 0.2711182181785506, 0.2710026604185501, 0.277

lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996


Training:  25%|▎| 540127/2160000 [19:42<10:47:12, 41.71it/s, epoch=50, loss=[0.3479875475913285, 0.2838602045178415, 0.2771272584050893, 0.2726467376699049, 0.2697069849570592, 0.2699516468991836, 0.27587

lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996


Training:  30%|▎| 648190/2160000 [23:37<10:42:00, 39.25it/s, epoch=60, loss=[0.3474935995290679, 0.282859401702881, 0.2760914461811383, 0.2713897527754304, 0.2674100728084648, 0.268984115148584, 0.2743860

lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995


Training:  35%|▎| 756100/2160000 [27:31<12:12:32, 31.94it/s, epoch=70, loss=[0.3471219005435705, 0.2821023683498302, 0.27533705110351236, 0.2707260215530791, 0.2671624277904631, 0.26817021034657934, 0.272

lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994


Training:  40%|▍| 864086/2160000 [31:27<13:43:08, 26.24it/s, epoch=80, loss=[0.3468189432471994, 0.2815253578871489, 0.27471917301416415, 0.27051846986015626, 0.26582753911614426, 0.267477544446786, 0.271

lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993


Training:  45%|▍| 972136/2160000 [35:23<9:24:59, 35.04it/s, epoch=90, loss=[0.3465479385107754, 0.28108280912041667, 0.2741787897298732, 0.27115266347924877, 0.26507415786385524, 0.2669264523933332, 0.270

lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992


Training:  50%|▌| 1080133/2160000 [39:20<7:55:49, 37.82it/s, epoch=100, loss=[0.3463303426901498, 0.2806465741991995, 0.2737176952511068, 0.31211357139050994, 0.2665514078612126, 0.2680282144000128, 0.271

lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991


Training:  55%|▌| 1188164/2160000 [43:19<6:45:58, 39.90it/s, epoch=110, loss=[0.3461486709366237, 0.2802855263153712, 0.2733182868361472, 0.2913275257498027, 0.3577369803438583, 0.2701649942000709, 0.2736

lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999


Training:  60%|▌| 1296186/2160000 [47:16<6:06:27, 39.29it/s, epoch=120, loss=[0.3459890516847369, 0.27998063482344177, 0.272969450553258, 0.2970820894589025, 0.3194327277441821, 0.2680695880204441, 0.2904

lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999


Training:  65%|▋| 1404079/2160000 [51:13<6:34:45, 31.92it/s, epoch=130, loss=[0.34584594137966657, 0.27972333267331095, 0.2726690336565175, 0.2861143037676808, 0.3080951032787561, 0.26697191677987575, 0.3

lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988


Training:  70%|▋| 1512169/2160000 [55:07<4:13:25, 42.61it/s, epoch=140, loss=[0.3457153404255709, 0.2794998166710136, 0.2724039135376612, 0.2812645929306743, 0.3005561882009106, 0.26727024540305144, 0.338

lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988


Training:  75%|▊| 1620124/2160000 [59:01<4:30:38, 33.25it/s, epoch=150, loss=[0.34559532972673623, 0.27929574931661294, 0.272162964095672, 0.27901180172959966, 0.29605126909911733, 0.26984430248538643, 0.

lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988


Training:  80%|▊| 1728120/2160000 [1:02:58<4:11:31, 28.62it/s, epoch=160, loss=[0.34548386680583143, 0.27911412899692856, 0.27194540873169887, 0.2770994600405294, 0.2927315481752163, 0.28728916560610146, 

lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988


Training:  85%|▊| 1836130/2160000 [1:06:55<2:37:50, 34.20it/s, epoch=170, loss=[0.3453804140786331, 0.2789554206281896, 0.27175373295942895, 0.27568028129637284, 0.28999369874596626, 0.28116049843529833, 

lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988


Training:  90%|▉| 1944127/2160000 [1:10:54<1:38:33, 36.51it/s, epoch=180, loss=[0.3452822294582921, 0.27881380466123473, 0.27158264157672707, 0.27432434223592206, 0.28767063461244147, 0.2772452251116433, 

lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988


Training:  95%|▉| 2052147/2160000 [1:14:57<54:33, 32.95it/s, epoch=190, loss=[0.3451901524513961, 0.27868344232440034, 0.27143353447318025, 0.2732185918589439, 0.28547354509433126, 0.27430977972845216, 0.

lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989


Training: 100%|█| 2160000/2160000 [1:19:02<00:00, 455.44it/s, epoch=200, loss=[0.34510236702859365, 0.2785701731592416, 0.271306706046064, 0.27220797682801945, 0.28329481042921545, 0.27308677529295294, 0.


Initialization 1 / 9
Complete

Initialization 2 / 9
Complete

Initialization 3 / 9
Complete

Initialization 4 / 9
Complete

Initialization 5 / 9
Complete

Initialization 6 / 9
Complete

Initialization 7 / 9
Complete

Initialization 8 / 9
Complete

Initialization 9 / 9
Complete



Training:   5%| | 108134/2160000 [04:12<13:52:33, 41.08it/s, epoch=10, loss=[0.35616555377840964, 0.2990906042108932, 0.29042550029854036, 0.288869671101371, 0.2860350266098975, 0.2868621599674221, 0.2867

lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4


Training:  10%| | 216102/2160000 [08:11<18:21:51, 29.40it/s, epoch=20, loss=[0.35056382810076103, 0.29062934045990274, 0.2805348734557628, 0.27967773521939887, 0.277754253819585, 0.2758049309502041, 0.277

lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3


Training:  15%|▏| 324127/2160000 [12:07<14:13:37, 35.84it/s, epoch=30, loss=[0.3485266398638485, 0.2870819883793587, 0.27726052304108956, 0.2757582181195416, 0.27362672125299775, 0.27168840023378565, 0.27

lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997


Training:  20%|▏| 432105/2160000 [16:04<15:20:25, 31.29it/s, epoch=40, loss=[0.34712170779705065, 0.2852504426985981, 0.27541094973683383, 0.2737602559228736, 0.27069710132976366, 0.2706219512720904, 0.27

lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996


Training:  25%|▎| 540123/2160000 [19:59<11:34:49, 38.86it/s, epoch=50, loss=[0.3463883703450362, 0.2839300082375608, 0.27409104575713467, 0.27242885234455266, 0.26859024755656713, 0.2694690300772584, 0.27

lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996


Training:  30%|▎| 648242/2160000 [23:53<9:56:42, 42.22it/s, epoch=60, loss=[0.3456778583427266, 0.2829901715119677, 0.2731543499231335, 0.27139982007443975, 0.2666577510908244, 0.26857603435715044, 0.2697

lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995


Training:  35%|▎| 756126/2160000 [27:47<11:06:09, 35.12it/s, epoch=70, loss=[0.34527606196701555, 0.2822496216247484, 0.2724005655695995, 0.270277276833852, 0.26549964842697005, 0.2677115209152301, 0.2681

lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994


Training:  40%|▍| 864233/2160000 [31:43<9:11:39, 39.15it/s, epoch=80, loss=[0.3449724538624287, 0.28166915237903595, 0.2717636602123577, 0.26976903185248374, 0.264394723189374, 0.2669484368463355, 0.26690

lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993


Training:  45%|▍| 972125/2160000 [35:41<9:31:51, 34.62it/s, epoch=90, loss=[0.344726005295912, 0.28117589679857086, 0.27123094094296274, 0.27083627939224275, 0.26385612461715946, 0.26629099915424964, 0.26

lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992


Training:  50%|▌| 1080201/2160000 [39:18<5:11:25, 57.79it/s, epoch=100, loss=[0.3445184005548559, 0.28078019147117933, 0.27077374413609506, 0.2713398892680802, 0.26379240803420534, 0.2664169143637019, 0.2

lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991


Training:  55%|▌| 1188269/2160000 [42:13<3:27:38, 78.00it/s, epoch=110, loss=[0.34433753001193185, 0.2804532517989479, 0.2703820102910199, 0.27049526321391326, 0.26358931849400197, 0.26590463829537225, 0.

lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999


Training:  60%|▌| 1296262/2160000 [45:00<3:20:21, 71.85it/s, epoch=120, loss=[0.34417825110256683, 0.28016263027985877, 0.2700249253710106, 0.2745998768011729, 0.263814642392099, 0.2676614523430662, 0.270

lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999


Training:  65%|▋| 1404179/2160000 [47:44<2:51:54, 73.28it/s, epoch=130, loss=[0.34403491452336316, 0.279903267150124, 0.26972398966550876, 0.29585155646006284, 0.28113498715062957, 0.2925061341623467, 0.3

lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988


Training:  70%|▋| 1512199/2160000 [50:26<2:34:54, 69.70it/s, epoch=140, loss=[0.3439040901511906, 0.2796658083299799, 0.2694522288441663, 0.29160518825054166, 0.300951107616226, 0.2832409878820185, 0.3124

lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988


Training:  75%|▊| 1620199/2160000 [53:31<2:47:17, 53.78it/s, epoch=150, loss=[0.34378367612759214, 0.27942094701031844, 0.2692291189481815, 0.30545293517410715, 0.2937806315720078, 0.2831611066311599, 0.3

lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988


Training:  80%|▊| 1728243/2160000 [56:30<1:31:03, 79.03it/s, epoch=160, loss=[0.34367116180558965, 0.2792390722284716, 0.2689953825374444, 0.3054206345975399, 0.2945958401759462, 0.2853360270460447, 0.294

lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988


Training:  85%|▊| 1836256/2160000 [59:14<1:13:20, 73.58it/s, epoch=170, loss=[0.34356554706891357, 0.2790795525163413, 0.2688030932843689, 0.29915249504149, 0.2848946818709373, 0.27742951350907524, 0.2884

lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988


Training:  90%|▉| 1944216/2160000 [1:01:59<47:26, 75.82it/s, epoch=180, loss=[0.3434664457043015, 0.2789404719571271, 0.2686362821112078, 0.2913336465507745, 0.28165147361656023, 0.2744654158751172, 0.285

lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988


Training:  95%|▉| 2052199/2160000 [1:04:40<23:05, 77.81it/s, epoch=190, loss=[0.3433735039085158, 0.2788081298271813, 0.26848207838833293, 0.28615389404197505, 0.2794854845354953, 0.27304313043753303, 0.2

lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989


Training: 100%|█| 2160000/2160000 [1:07:23<00:00, 534.18it/s, epoch=200, loss=[0.34328535713255426, 0.27866577297449097, 0.2683759013563392, 0.2827893986801308, 0.2774406132102011, 0.27150080991288034, 0.


Initialization 1 / 9
Complete

Initialization 2 / 9
Complete

Initialization 3 / 9
Complete

Initialization 4 / 9
Complete

Initialization 5 / 9
Complete

Initialization 6 / 9
Complete

Initialization 7 / 9
Complete

Initialization 8 / 9
Complete

Initialization 9 / 9
Complete



Training:   5%| | 108270/2160000 [02:53<7:40:52, 74.20it/s, epoch=10, loss=[0.359643490215142, 0.2963589835663636, 0.294047114774585, 0.2922550838937357, 0.28903293716410755, 0.29022876019279154, 0.290704

lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4


Training:  10%| | 216190/2160000 [05:36<7:37:17, 70.84it/s, epoch=20, loss=[0.35334477320313507, 0.2894840569049125, 0.2830223219593363, 0.2819360992809139, 0.27923103218277295, 0.2807476642976203, 0.2804

lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3


Training:  15%|▏| 324203/2160000 [08:21<7:26:56, 68.46it/s, epoch=30, loss=[0.3506420801331602, 0.2857461208850149, 0.27935702944795326, 0.2770828187962371, 0.2753344259659451, 0.27647413099805446, 0.2770

lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997


Training:  20%|▏| 432208/2160000 [11:03<6:24:37, 74.87it/s, epoch=40, loss=[0.34938172926505406, 0.28366625539958473, 0.27713345870375644, 0.27462641427914325, 0.2726525054126974, 0.27499783888459184, 0.2

lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996


Training:  25%|▎| 540217/2160000 [13:44<5:50:28, 77.03it/s, epoch=50, loss=[0.34854359840353333, 0.2822925648341573, 0.2757125877340634, 0.27325967237353266, 0.2712025173505143, 0.27395017951726963, 0.273

lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996


Training:  30%|▎| 648218/2160000 [16:28<6:04:47, 69.07it/s, epoch=60, loss=[0.34803674608469076, 0.28128046296536935, 0.27463998469213674, 0.27212414470811674, 0.26949041391412415, 0.27299856926004096, 0.

lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995


Training:  35%|▎| 756172/2160000 [19:12<5:20:16, 73.05it/s, epoch=70, loss=[0.3476567186911899, 0.28049350328743466, 0.2738538556297622, 0.271080607672532, 0.2676519325127203, 0.27194126486778275, 0.27104

lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994


Training:  40%|▍| 864260/2160000 [21:56<4:52:22, 73.86it/s, epoch=80, loss=[0.34735549097259794, 0.2798450211683911, 0.27319038617114233, 0.27060159822305013, 0.26640902373939757, 0.271141389509042, 0.269

lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993


Training:  45%|▍| 972262/2160000 [24:40<4:34:58, 71.99it/s, epoch=90, loss=[0.3471114240338406, 0.27932213199635336, 0.2726394480466844, 0.2712502736349898, 0.2656282657881578, 0.2708229936907695, 0.26879

lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992


Training:  50%|▌| 1080134/2160000 [27:29<5:29:17, 54.66it/s, epoch=100, loss=[0.346893870433171, 0.2789141434182724, 0.2721832319845757, 0.274993400350213, 0.2653058323760831, 0.27163623469571263, 0.26818

lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991


Training:  55%|▌| 1188190/2160000 [30:38<5:02:14, 53.59it/s, epoch=110, loss=[0.3466491352766756, 0.27856193696459186, 0.2717853287855784, 0.30034703753888636, 0.2684265966961778, 0.2920034929116568, 0.27

lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999


Training:  60%|▌| 1296158/2160000 [33:46<4:24:34, 54.42it/s, epoch=120, loss=[0.34645079530775486, 0.2785138939817744, 0.27148573557535804, 0.2901259971410039, 0.34441193126142017, 0.28501092500984604, 0.

lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999


Training:  65%|▋| 1404208/2160000 [36:56<3:44:35, 56.09it/s, epoch=130, loss=[0.3462910477817061, 0.2781176036844649, 0.2711364139368137, 0.2952852532019216, 0.31336694429318135, 0.2810851870973903, 0.302

lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988


Training:  70%|▋| 1512188/2160000 [39:47<2:43:50, 65.90it/s, epoch=140, loss=[0.3461558544635771, 0.2778467730432748, 0.27085727788507935, 0.2855093932648499, 0.30247451486686844, 0.2787995064755282, 0.29

lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988


Training:  75%|▊| 1620277/2160000 [42:37<2:00:23, 74.72it/s, epoch=150, loss=[0.34603442298869314, 0.2776203224807975, 0.2706169552356002, 0.28191512070596214, 0.2966522756963969, 0.2774102833867071, 0.28

lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988


Training:  80%|▊| 1728267/2160000 [45:20<1:33:43, 76.77it/s, epoch=160, loss=[0.3459234660118815, 0.2774231149752935, 0.2704035596549512, 0.27980933934450114, 0.2923426671326162, 0.2762996616214515, 0.283

lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988


Training:  85%|▊| 1836165/2160000 [48:05<1:10:45, 76.27it/s, epoch=170, loss=[0.3458199962725244, 0.2772509484738113, 0.2702188798040148, 0.27708336072663475, 0.2889859033624331, 0.2751088916510344, 0.280

lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988


Training:  90%|▉| 1944193/2160000 [50:49<48:12, 74.60it/s, epoch=180, loss=[0.34569432104627335, 0.27715929274757684, 0.27008226402103896, 0.2754704561084514, 0.28558270615836, 0.27411267777284004, 0.2779

lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988


Training:  95%|▉| 2052163/2160000 [53:34<24:57, 72.03it/s, epoch=190, loss=[0.34553446315228903, 0.27697590413192896, 0.26990570473174263, 0.2750546931972112, 0.28370364638666284, 0.27346382220586135, 0.2

lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989


Training: 100%|█| 2160000/2160000 [56:17<00:00, 639.48it/s, epoch=200, loss=[0.34544427049656745, 0.2768397710223993, 0.26977131508290747, 0.2733123572419086, 0.28144431414703536, 0.2726252969106035, 0.27


Initialization 1 / 9
Complete

Initialization 2 / 9
Complete

Initialization 3 / 9
Complete

Initialization 4 / 9
Complete

Initialization 5 / 9
Complete

Initialization 6 / 9
Complete

Initialization 7 / 9
Complete

Initialization 8 / 9
Complete

Initialization 9 / 9
Complete



Training:   5%| | 108175/2160000 [02:53<7:54:33, 72.06it/s, epoch=10, loss=[0.3573980827877921, 0.3002213993420204, 0.2915120487163469, 0.28756282500922714, 0.2891799497852719, 0.28919616182645164, 0.2914

lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4


Training:  10%| | 216166/2160000 [05:36<6:54:19, 78.19it/s, epoch=20, loss=[0.35189227469265444, 0.2920157304406172, 0.2830678908775252, 0.27833982768158116, 0.2790709729741015, 0.27772786249717063, 0.280

lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3


Training:  15%|▏| 324138/2160000 [08:21<6:46:49, 75.21it/s, epoch=30, loss=[0.34964939656356997, 0.2876537227878973, 0.27909818492829813, 0.27455022739867296, 0.2751495365550121, 0.27425747282803065, 0.27

lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997


Training:  20%|▏| 432217/2160000 [11:07<6:31:32, 73.55it/s, epoch=40, loss=[0.34821435126165595, 0.2854258595655362, 0.2771873192985854, 0.2726565242807072, 0.2727499345938367, 0.27282772908608094, 0.2752

lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996


Training:  25%|▎| 540128/2160000 [13:49<8:13:52, 54.67it/s, epoch=50, loss=[0.34738235503435194, 0.284057072624564, 0.27584597912927444, 0.27127213815848034, 0.2705377215892079, 0.27168684805432924, 0.273

lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996


Training:  30%|▎| 648217/2160000 [16:34<5:46:15, 72.77it/s, epoch=60, loss=[0.34686596962312816, 0.2830393719673159, 0.27484771867593116, 0.2699762664983667, 0.2687334101895496, 0.27069755109647897, 0.272

lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995


Training:  35%|▎| 756184/2160000 [19:19<5:11:33, 75.10it/s, epoch=70, loss=[0.3464729765554269, 0.28227494339148196, 0.2740830154468612, 0.269175350169341, 0.26737766619771725, 0.26988197545210496, 0.2707

lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994


Training:  40%|▍| 864285/2160000 [22:02<4:51:23, 74.11it/s, epoch=80, loss=[0.34616317893067977, 0.2816570588201285, 0.2734653796255585, 0.2690954839438201, 0.26656388826668265, 0.2692324219395719, 0.2695

lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993


Training:  45%|▍| 972214/2160000 [24:44<4:34:37, 72.09it/s, epoch=90, loss=[0.3459100094189248, 0.2811714701602855, 0.2728859740992388, 0.26880368997653326, 0.2665414593617121, 0.26870903032521437, 0.2692

lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992


Training:  50%|▌| 1080104/2160000 [28:34<11:14:57, 26.67it/s, epoch=100, loss=[0.34569631795088435, 0.28076650142669685, 0.27244088175396125, 0.27197130300104577, 0.2654669378325339, 0.2682292447984219, 0

lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991


Training:  55%|▌| 1188126/2160000 [32:37<6:46:14, 39.87it/s, epoch=110, loss=[0.3455105085919302, 0.28042567133903545, 0.27203576028347015, 0.29808785013854516, 0.27025957355896646, 0.26873613737523594, 0

lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999


Training:  60%|▌| 1296082/2160000 [36:39<9:23:28, 25.55it/s, epoch=120, loss=[0.3453460327039159, 0.2801620430747668, 0.2716857560724022, 0.28973880926768014, 0.35193085667987645, 0.2703289223213987, 0.30

lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999


Training:  65%|▋| 1404136/2160000 [40:41<5:29:01, 38.29it/s, epoch=130, loss=[0.3451934915781014, 0.2799376157671217, 0.2713932448873913, 0.29210970560709665, 0.31994950433572156, 0.2692407651990653, 0.29

lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988


Training:  70%|▋| 1512109/2160000 [44:39<4:51:02, 37.10it/s, epoch=140, loss=[0.34505823398629826, 0.2797079244256017, 0.2711231102794407, 0.2850900686532258, 0.30962242615719654, 0.2682336375862359, 0.28

lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988


Training:  75%|▊| 1620104/2160000 [48:37<4:11:49, 35.73it/s, epoch=150, loss=[0.34493462766210264, 0.2795149947951234, 0.27088103376328915, 0.28198782980442066, 0.3032135645300153, 0.2859995453059675, 0.2

lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988


Training:  80%|▊| 1728167/2160000 [52:34<3:15:48, 36.76it/s, epoch=160, loss=[0.344820132727424, 0.27934926837682744, 0.27066664954026504, 0.27911984528104467, 0.29812946237623666, 0.2790555781871075, 0.2

lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988


Training:  85%|▊| 1836200/2160000 [56:35<2:22:55, 37.76it/s, epoch=170, loss=[0.34470646604895594, 0.27920931696891765, 0.2704657636831203, 0.277117525860667, 0.29338807319601323, 0.27556811844309176, 0.2

lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988


Training:  90%|▉| 1944174/2160000 [1:00:34<1:45:19, 34.15it/s, epoch=180, loss=[0.34460032323996226, 0.27905716190735463, 0.27029381312429873, 0.2755094480762882, 0.290796579346061, 0.274277397120992, 0.2

lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988


Training:  95%|▉| 2052084/2160000 [1:04:36<58:05, 30.96it/s, epoch=190, loss=[0.34450416773557657, 0.278929830764731, 0.2701466266562541, 0.274439174855749, 0.28765737960736, 0.27312159642577194, 0.276225

lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989


Training: 100%|█| 2160000/2160000 [1:08:38<00:00, 524.46it/s, epoch=200, loss=[0.3444142593443392, 0.2788142452140647, 0.2700207697351773, 0.27309889547526844, 0.2852709848930439, 0.27230499078830067, 0.2


Initialization 1 / 9
Complete

Initialization 2 / 9
Complete

Initialization 3 / 9
Complete

Initialization 4 / 9
Complete

Initialization 5 / 9
Complete

Initialization 6 / 9
Complete

Initialization 7 / 9
Complete

Initialization 8 / 9
Complete

Initialization 9 / 9
Complete



Training:   5%| | 108121/2160000 [04:18<21:06:16, 27.01it/s, epoch=10, loss=[0.35831941331426354, 0.3019789513945584, 0.29453714658816604, 0.2887136005858577, 0.2885637841373686, 0.2881600274145606, 0.284

lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4
lr decreased to  2.4


Training:  10%| | 216112/2160000 [08:18<16:47:12, 32.17it/s, epoch=20, loss=[0.3515731731553867, 0.29368366462488993, 0.2841833292941255, 0.2801436815410852, 0.2784273037811122, 0.2786143997311588, 0.2760

lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3
lr decreased to  2.3


Training:  15%|▏| 324099/2160000 [12:19<15:48:26, 32.26it/s, epoch=30, loss=[0.3494919535766044, 0.2900776969393089, 0.2805481721212468, 0.2758874025195838, 0.27462360958258314, 0.2743339676161607, 0.2727

lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997
lr decreased to  2.1999999999999997


Training:  20%|▏| 432116/2160000 [16:19<13:35:26, 35.32it/s, epoch=40, loss=[0.3478010096649328, 0.2881652389715115, 0.2785027808199326, 0.2736796686549977, 0.2720026396711668, 0.273149773006638, 0.270503

lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996
lr decreased to  2.0999999999999996


Training:  25%|▎| 540106/2160000 [20:19<12:54:13, 34.87it/s, epoch=50, loss=[0.34691838356355836, 0.28663220080236596, 0.27709294560054887, 0.27230961625774736, 0.2700743502875168, 0.2721134637792902, 0.2

lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996
lr decreased to  1.9999999999999996


Training:  30%|▎| 648119/2160000 [24:21<12:06:57, 34.66it/s, epoch=60, loss=[0.34640729270875453, 0.2856238853931423, 0.2760340812057258, 0.27119847012062875, 0.2680675678451859, 0.2712205250064531, 0.267

lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995
lr decreased to  1.8999999999999995


Training:  35%|▎| 756086/2160000 [28:20<11:03:30, 35.26it/s, epoch=70, loss=[0.3460242665062342, 0.284861550802986, 0.2752512252827488, 0.2701323104153077, 0.266618560527762, 0.2703815658390523, 0.2662684

lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994
lr decreased to  1.7999999999999994


Training:  40%|▍| 864135/2160000 [32:18<9:53:42, 36.38it/s, epoch=80, loss=[0.3456810077776511, 0.28429348640143814, 0.27465142935514475, 0.2692675890773536, 0.2674123697231211, 0.26962694627543304, 0.265

lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993
lr decreased to  1.6999999999999993


Training:  45%|▍| 972097/2160000 [36:19<10:27:10, 31.57it/s, epoch=90, loss=[0.34541431797047445, 0.2837977436184881, 0.2741258011013268, 0.2686926760276162, 0.26555599082261316, 0.2688776280730964, 0.264

lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992
lr decreased to  1.5999999999999992


Training:  50%|▌| 1080105/2160000 [40:20<8:12:42, 36.53it/s, epoch=100, loss=[0.34516016624867896, 0.2834037199368082, 0.2736249105632308, 0.26957677694658405, 0.26480436122665835, 0.2680850581576428, 0.2

lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991
lr decreased to  1.4999999999999991


Training:  55%|▌| 1188133/2160000 [44:18<6:57:11, 38.83it/s, epoch=110, loss=[0.3449656333277621, 0.2830603939543163, 0.2732468758523468, 0.2704672821859519, 0.26397273312012354, 0.2680041046688956, 0.264

lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999
lr decreased to  1.399999999999999


Training:  60%|▌| 1296133/2160000 [48:26<8:30:12, 28.22it/s, epoch=120, loss=[0.34472671069204824, 0.2827710089584194, 0.27291150401035974, 0.3004551280289888, 0.2817067190756402, 0.32213248968124397, 0.2

lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999
lr decreased to  1.299999999999999


Training:  65%|▋| 1404064/2160000 [52:44<8:47:54, 23.87it/s, epoch=130, loss=[0.344574675311645, 0.282490181326866, 0.27260174043476576, 0.2966396140058841, 0.32965190117557824, 0.29131667604048994, 0.267

lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988
lr decreased to  1.1999999999999988


Training:  70%|▋| 1512115/2160000 [57:14<5:20:12, 33.72it/s, epoch=140, loss=[0.34443961729606054, 0.2822559718290966, 0.2723386544237534, 0.2876623100787398, 0.31398556816081147, 0.2830630425363778, 0.26

lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988
lr decreased to  1.0999999999999988


Training:  75%|▊| 1620112/2160000 [1:01:44<5:40:25, 26.43it/s, epoch=150, loss=[0.3443147764851652, 0.2820517709106205, 0.2720971550047395, 0.282917470385631, 0.3041213966161012, 0.2795091025531293, 0.270

lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988
lr decreased to  0.9999999999999988


Training:  80%|▊| 1728091/2160000 [1:06:14<6:12:24, 19.33it/s, epoch=160, loss=[0.3441993955771129, 0.28186027812461095, 0.27188499823212603, 0.2794132092346747, 0.29935687586665155, 0.277162696868181, 0.

lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988
lr decreased to  0.8999999999999988


Training:  85%|▊| 1836069/2160000 [1:10:47<2:57:17, 30.45it/s, epoch=170, loss=[0.344092305973172, 0.2816824591904879, 0.27170025366047995, 0.27785584849615896, 0.29439177612463646, 0.27570862847069894, 0

lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988
lr decreased to  0.7999999999999988


Training:  90%|▉| 1944208/2160000 [1:15:04<1:28:58, 40.42it/s, epoch=180, loss=[0.34399080408116195, 0.28152878810961995, 0.27152891064683626, 0.27592521178225676, 0.2909200218568246, 0.2739134452988707, 

lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988
lr decreased to  0.6999999999999988


Training:  95%|▉| 2052106/2160000 [1:19:17<52:56, 33.97it/s, epoch=190, loss=[0.34389630009730643, 0.2813922677685818, 0.27138084525863365, 0.27523945840696507, 0.2883856549610701, 0.2727192341784632, 0.2

lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989
lr decreased to  0.5999999999999989


Training: 100%|█| 2160000/2160000 [1:23:25<00:00, 431.53it/s, epoch=200, loss=[0.3438074507812659, 0.28127340219914887, 0.27125316590070736, 0.27351496992011864, 0.2860166090975205, 0.27202082378168907, 0


In [11]:
print(test_acc)
print(np.max(test_acc))

[[[array([0.0769, 0.0512, 0.1242, 0.0869, 0.0895, 0.0983, 0.089 , 0.1198,
       0.1524])], [array([0.8889, 0.9054, 0.9089, 0.9111, 0.911 , 0.9112, 0.9114, 0.9112,
       0.911 ])], [array([0.9   , 0.9163, 0.9186, 0.9193, 0.92  , 0.9204, 0.9204, 0.9203,
       0.9203])], [array([0.9044, 0.9223, 0.9246, 0.9242, 0.9259, 0.9264, 0.9266, 0.9267,
       0.927 ])], [array([0.907 , 0.9258, 0.9283, 0.9291, 0.9297, 0.9301, 0.9305, 0.9308,
       0.9308])], [array([0.9081, 0.9287, 0.9311, 0.9323, 0.9327, 0.9326, 0.9328, 0.9331,
       0.933 ])], [array([0.909 , 0.9305, 0.9332, 0.9339, 0.9349, 0.9349, 0.935 , 0.9349,
       0.9351])], [array([0.9096, 0.9323, 0.935 , 0.9367, 0.937 , 0.9371, 0.9367, 0.937 ,
       0.9365])], [array([0.9096, 0.9335, 0.9372, 0.9384, 0.9381, 0.9382, 0.9387, 0.9389,
       0.939 ])], [array([0.9102, 0.935 , 0.9383, 0.9393, 0.939 , 0.9391, 0.9397, 0.9397,
       0.9398])], [array([0.9107, 0.9366, 0.9396, 0.9399, 0.9396, 0.9404, 0.9408, 0.9411,
       0.9407])], [array([

In [12]:
print(train_acc)
print(np.max(train_acc))

[[[array([0.07681667, 0.05031667, 0.12      , 0.08858333, 0.10001667,
       0.10185   , 0.08891667, 0.12171667, 0.14408333])], [array([0.88228333, 0.8996    , 0.90306667, 0.9034    , 0.90338333,
       0.904     , 0.90436667, 0.90438333, 0.90423333])], [array([0.89298333, 0.91161667, 0.91438333, 0.91495   , 0.91513333,
       0.91531667, 0.91551667, 0.91528333, 0.91526667])], [array([0.89733333, 0.91783333, 0.92068333, 0.92111667, 0.92151667,
       0.92173333, 0.92185   , 0.92175   , 0.92185   ])], [array([0.90028333, 0.92241667, 0.92501667, 0.92558333, 0.9256    ,
       0.9259    , 0.92606667, 0.92621667, 0.92645   ])], [array([0.90245   , 0.92551667, 0.92826667, 0.92916667, 0.92941667,
       0.92965   , 0.92971667, 0.92978333, 0.9297    ])], [array([0.90395   , 0.92856667, 0.93103333, 0.93151667, 0.9319    ,
       0.93215   , 0.93213333, 0.93206667, 0.93223333])], [array([0.90466667, 0.93016667, 0.93298333, 0.93356667, 0.93388333,
       0.93411667, 0.93423333, 0.93423333, 0.934

In [13]:
np.save("./../new_data/mnist_train_acc_multilayer.npy", np.array(train_acc))
np.save("./../new_data/mnist_test_acc_multilayer.npy", np.array(test_acc))
np.save("./../new_data/w_mnist_bestconfig_multilayer.npy", np.array(w_layers))
np.save("./../new_data/loss_mnist_bestconfig_output_multilayer.npy", np.array(loss))