In [1]:
import torch
from tqdm import trange

import utils as ut
import evaluate as eva

In [2]:
class SVM():
    def __init__(self, kwargs):
        self.model_name = 'SVM'
        self.W = torch.randn(kwargs['feature_dim']+1, kwargs['class_num'])
        self.N = kwargs['class_num']
        self.init_weight()
        
    def init_weight(self):
        torch.nn.init.xavier_normal_(self.W)
        
    def train(self, X, Y, alpha=0.01, reg=2.5e4, vec=True):
        """
        功能: 完成训练过程，包括(1)求解损失, 计算梯度. (2) 正则化，计算梯度，(3)更新参数 
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            alpha(float):                   # 学习率
            reg(float):                     # 正则化系数
        输出:
            L(int):(1)                      # 损失，范围给主程序以打印显示        
        """
        
        # 计算梯度与正则化
        if vec:
            L, dW = self.cal_dw_with_vec(X, Y, reg)
        else:
            L, dW = self.cal_dw_with_loop(X, Y, reg)
            
        # 更新参数
        self.W -= alpha * dW
        return L
    
    def cal_dw_with_loop(self, X, Y, reg):
        """
        功能： 计算损失和梯度
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            reg(float):                    # 正则化系数
        输出:
            L(int): 1                      # 损失               
            dW(Tensor):(K+1,C)             # 参数梯度       
        """
        L = 0.0
        N = X.size(0)
        F, C = self.W.size()
        dW = torch.zeros(F, C)
        
        # (1) 求解损失
        for idx, Xi in enumerate(X):
            yi = Y[idx]
            scores = Xi.matmul(self.W)
            syi = scores[yi]
            for j in range(self.N):
                if j == yi:
                    continue
                sj = scores[j]
                if syi - sj - 1 < 0:
                    L += (sj - syi + 1).item()
                    dW[:,j] += Xi.t()
                    dW[:,yi] -= Xi.t()
        
        # (2) 正则化
        L = L / N +  0.5*reg*torch.sum(torch.pow(self.W, 2)).item()
        dW = dW / N +  reg*self.W
        
        return L, dW
    
    def cal_dw_with_vec(self, X, Y, reg):
        """
        功能： 计算损失和梯度
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            reg(float):                    # 正则化系数
        输出:
            L(int): 1                      # 损失               
            dW(Tensor):(K+1,C)             # 参数梯度       
        """
        
        N = X.size(0)
        F, C = self.W.size()
        
        score = X.matmul(self.W)                                       # (N, C)
        correct = score[range(N), Y.tolist()].unsqueeze(1)             # (N, 1)
        score = torch.relu(score-correct+1)                            # (N, C)
        score[range(N), Y.tolist()] = 0
        
        L = torch.sum(score).item()
        L = L / N +  0.5*reg*torch.sum(torch.pow(self.W, 2)).item()
        
        
        dW = torch.zeros(F, C)
        mask = torch.zeros(N, C)
        mask[score>0] = 1                                              # (N,C)
        mask[range(N), Y.tolist()] = -torch.sum(mask, 1)               # (N,C)
        dW = X.t().matmul(mask)                                        # (F,C)

        dW = dW / N + reg*self.W
        return L, dW
        
    def predict(self, X):
        """
        功能: 预测输入数据标签
        输入:
            X(Tensor): (N, 3*32*32)
        输出:
            labels(Tensor): (N)
        """
        S = X.matmul(self.W)  # (N, C)
        return torch.max(S, 1)[1]
        
        

In [3]:
train_set, test_set = ut.data_load('./data')

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [4]:
batch_num = 200
opt = {"feature_dim":3*32*32, "class_num":10}
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_num, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_num, shuffle=False, num_workers=4)

In [None]:
def train(alpha, reg, epoches):
    """
    功能：完成训练过程
    输入:
        alpha(int):(1)     # 学习率
        reg(int):(1)       # 正则化系数
        epoches(int):(1)   # 迭代次数
    输出:
        svmEr(class) 训练好的模型
        alpha
        reg
    """
    svmEr = SVM(opt)
    for epoch in range(epoches):
        train_data_interator = enumerate(train_loader)
        train_steps = test_steps = (len(train_set) + batch_num - 1) // batch_num

        t = trange(train_steps)
        loss_avg = ut.RunningAverage()
        print("epoch:{}".format(epoch))
        for i in t:
            idx, data = next(train_data_interator)
            X_batch, Y_batch = data
            X_batch = X_batch.view(X_batch.size(0), -1)
            X_batch = torch.cat((torch.ones(X_batch.size(0),1), X_batch), 1)   

            loss = svmEr.train(X_batch, Y_batch, alpha=1e-4, reg=1, vec=True)
            loss_avg.update(loss)
            t.set_postfix(loss='{:05.3f}/{:05.3f}'.format(loss_avg(), loss))
        print(loss_avg())
    return svmEr

def evaluate(svmEr):
    """
    功能：使用训练好的模型进行预测，并评测结果
    输入: 
        svmEr(class) 训练好的模型
    输出: 
        acc(int):(1) 模型准确率
    """
    test_data_interator = enumerate(test_loader)
    test_steps = test_steps = (len(test_set) + batch_num - 1) // batch_num

    t = trange(test_steps)
    Y_predict = []
    Y_true = []
    for i in t:
        idx, data = next(test_data_interator)
        X_batch, Y_batch = data
        Y_true.extend(Y_batch.tolist())
        X_batch = X_batch.view(X_batch.size(0), -1)
        X_batch = torch.cat((torch.ones(X_batch.size(0),1), X_batch), 1)   

        y = svmEr.predict(X_batch)
        Y_predict.extend(y.tolist())
        
    Y_predict = torch.LongTensor(Y_predict)
    Y_true = torch.LongTensor(Y_true)
    acc = torch.sum(Y_predict == Y_true).item() /len(Y_predict)
    
    return acc
    

In [None]:
lrs = [1e-2, 1e-3, 1e-4, 1e-5]
reg_strs = [0, 1, 10, 100, 1000]

result = {}

best_lr = None
best_reg = None
best_svm = None
best_acc = -1

for lr in lrs:
    for reg in reg_strs:
        svmEr = train(lr, reg, 25)
        acc = evaluate(svmEr)
        print("lr:{}; reg:{}; acc:{}".format(lr, reg, acc))
        if acc > best_acc:
            best_lr = lr
            best_reg = reg
            best_svm = svmEr
        result[(lr, reg)] = acc
print("the best: lr:{}; reg:{}; acc:{}".format(best_lr, best_reg, best_acc))

  2%|▏         | 6/250 [00:00<00:04, 59.65it/s, loss=19.626/19.076]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 128.34it/s, loss=17.916/16.739]

17.916246122131355



  4%|▎         | 9/250 [00:00<00:02, 87.78it/s, loss=16.962/16.527]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 121.80it/s, loss=16.359/15.812]

16.35899254180909



  4%|▍         | 11/250 [00:00<00:02, 107.86it/s, loss=15.887/15.926]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 122.62it/s, loss=15.422/14.556]

15.422245274658213



  4%|▍         | 11/250 [00:00<00:02, 107.69it/s, loss=15.068/14.713]

epoch:3


100%|██████████| 250/250 [00:01<00:00, 126.77it/s, loss=14.724/14.656]


14.72368852920533


  4%|▎         | 9/250 [00:00<00:02, 85.51it/s, loss=14.483/14.247]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 124.18it/s, loss=14.140/13.464]

14.140059371109006



  4%|▍         | 11/250 [00:00<00:02, 104.50it/s, loss=13.871/14.231]

epoch:5


100%|██████████| 250/250 [00:01<00:00, 126.94it/s, loss=13.625/12.891]

13.62452166725158



  4%|▍         | 11/250 [00:00<00:02, 108.54it/s, loss=13.250/13.248]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 121.78it/s, loss=13.159/13.136]


13.159336671218869


  4%|▎         | 9/250 [00:00<00:02, 87.76it/s, loss=12.820/12.731]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 118.04it/s, loss=12.731/12.259]


12.731060939178466


  4%|▍         | 10/250 [00:00<00:02, 99.92it/s, loss=12.546/12.278]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 126.49it/s, loss=12.335/12.217]


12.335066028442393


  4%|▎         | 9/250 [00:00<00:02, 84.67it/s, loss=12.044/12.404]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 124.61it/s, loss=11.966/11.266]

11.965536507186881



  4%|▍         | 10/250 [00:00<00:02, 98.35it/s, loss=11.786/12.106]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 119.00it/s, loss=11.621/11.060]

11.62132484992981



  4%|▍         | 10/250 [00:00<00:02, 98.08it/s, loss=11.517/11.710]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 124.29it/s, loss=11.296/11.574]

11.296423723754872



  4%|▍         | 10/250 [00:00<00:02, 96.99it/s, loss=11.003/11.242]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 122.79it/s, loss=10.993/11.024]


10.992526661071778


  4%|▎         | 9/250 [00:00<00:02, 83.33it/s, loss=10.799/10.465]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 126.51it/s, loss=10.705/11.083]

10.705010594329833



  4%|▎         | 9/250 [00:00<00:02, 83.94it/s, loss=10.579/10.664]

epoch:14


100%|██████████| 250/250 [00:01<00:00, 125.87it/s, loss=10.436/9.917]

10.435513696746824



  4%|▎         | 9/250 [00:00<00:02, 89.82it/s, loss=10.275/10.178]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 125.10it/s, loss=10.178/10.164]

10.17831240562439



  4%|▍         | 11/250 [00:00<00:02, 106.64it/s, loss=10.062/10.335]

epoch:16


100%|██████████| 250/250 [00:02<00:00, 123.30it/s, loss=9.938/9.928]

9.938086416778564



  4%|▎         | 9/250 [00:00<00:02, 88.91it/s, loss=9.653/9.865]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 120.55it/s, loss=9.710/9.548]

9.709956866836546



  4%|▎         | 9/250 [00:00<00:02, 89.13it/s, loss=9.561/10.212]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 123.01it/s, loss=9.493/9.535]


9.493302478179936


  4%|▍         | 10/250 [00:00<00:02, 98.41it/s, loss=9.395/9.489] 

epoch:19


100%|██████████| 250/250 [00:02<00:00, 123.30it/s, loss=9.288/9.249]

9.288450702667237



  4%|▍         | 10/250 [00:00<00:02, 86.46it/s, loss=9.192/8.905]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 123.49it/s, loss=9.095/9.419]


9.094563897895812


  4%|▍         | 11/250 [00:00<00:02, 106.33it/s, loss=9.048/8.729]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 121.40it/s, loss=8.911/9.272]

8.910736973114023



  4%|▍         | 10/250 [00:00<00:02, 94.11it/s, loss=8.760/8.737]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 119.10it/s, loss=8.737/8.125]

8.73744777889252



  4%|▎         | 9/250 [00:00<00:02, 88.94it/s, loss=8.669/9.182]

epoch:23


100%|██████████| 250/250 [00:01<00:00, 127.56it/s, loss=8.570/8.196]

8.570184391937254



  4%|▍         | 10/250 [00:00<00:02, 97.92it/s, loss=8.413/8.295]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 125.77it/s, loss=8.415/8.484]


8.414628829498287


100%|██████████| 50/50 [00:00<00:00, 124.10it/s]


lr:0.01; reg:0; acc:0.35


  2%|▏         | 6/250 [00:00<00:04, 59.96it/s, loss=19.167/18.455]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 117.74it/s, loss=17.816/17.050]


17.815554900207513


  5%|▌         | 13/250 [00:00<00:02, 116.09it/s, loss=16.875/16.814]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 124.62it/s, loss=16.267/15.873]

16.2671507395935



  4%|▍         | 10/250 [00:00<00:02, 99.22it/s, loss=15.679/15.545]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 123.49it/s, loss=15.352/14.546]

15.351803334198003



  4%|▍         | 10/250 [00:00<00:02, 98.55it/s, loss=15.034/14.419]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 120.65it/s, loss=14.670/14.062]

14.669615088043209



  5%|▍         | 12/250 [00:00<00:02, 114.68it/s, loss=14.444/14.898]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 123.14it/s, loss=14.096/13.548]


14.095687815322872


  4%|▍         | 10/250 [00:00<00:02, 97.94it/s, loss=13.866/14.251]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 121.20it/s, loss=13.585/13.280]

13.584698061141962



  4%|▎         | 9/250 [00:00<00:02, 88.82it/s, loss=13.429/13.204]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 122.64it/s, loss=13.121/12.958]

13.120814876632693



  3%|▎         | 8/250 [00:00<00:03, 78.73it/s, loss=12.975/13.468]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 124.36it/s, loss=12.695/12.388]


12.695199081573483


  4%|▍         | 11/250 [00:00<00:02, 109.21it/s, loss=12.410/12.669]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 122.15it/s, loss=12.299/12.102]


12.299339632110602


  4%|▍         | 10/250 [00:00<00:02, 97.06it/s, loss=12.123/12.005]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 123.93it/s, loss=11.932/11.142]


11.932037286682132


  4%|▍         | 10/250 [00:00<00:02, 99.29it/s, loss=11.815/12.076]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 122.17it/s, loss=11.588/11.073]

11.588364110717773



  4%|▍         | 11/250 [00:00<00:02, 108.25it/s, loss=11.378/10.928]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 115.60it/s, loss=11.264/11.060]


11.264279318084705


  4%|▍         | 10/250 [00:00<00:02, 96.48it/s, loss=11.083/10.841]

epoch:12


100%|██████████| 250/250 [00:01<00:00, 125.75it/s, loss=10.962/10.264]

10.962209168014528



  4%|▍         | 11/250 [00:00<00:02, 107.82it/s, loss=10.718/10.597]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 125.18it/s, loss=10.677/9.965]

10.677119106140138



  4%|▎         | 9/250 [00:00<00:02, 87.90it/s, loss=10.585/10.645]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 122.04it/s, loss=10.407/10.308]

10.40736568916321



  4%|▎         | 9/250 [00:00<00:02, 89.07it/s, loss=10.320/10.254]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 124.46it/s, loss=10.152/9.818]

10.152180160598757



  4%|▎         | 9/250 [00:00<00:02, 87.85it/s, loss=10.006/9.442] 

epoch:16


100%|██████████| 250/250 [00:02<00:00, 119.21it/s, loss=9.912/9.854]

9.911775660705565



  4%|▍         | 11/250 [00:00<00:02, 105.42it/s, loss=9.819/9.695]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 124.09it/s, loss=9.686/9.429]


9.686147310333261


  4%|▍         | 11/250 [00:00<00:02, 108.43it/s, loss=9.490/9.036]

epoch:18


100%|██████████| 250/250 [00:01<00:00, 125.74it/s, loss=9.471/9.548]

9.471020856246945



  4%|▍         | 11/250 [00:00<00:02, 107.80it/s, loss=9.376/9.722]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 117.56it/s, loss=9.267/9.332]

9.267187007369992



  4%|▍         | 10/250 [00:00<00:02, 97.52it/s, loss=9.113/8.707]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 124.58it/s, loss=9.074/9.159]

9.07420670124054



  4%|▍         | 10/250 [00:00<00:02, 98.77it/s, loss=8.999/9.593]

epoch:21


100%|██████████| 250/250 [00:01<00:00, 125.85it/s, loss=8.891/8.568]


8.890999304885868


  4%|▎         | 9/250 [00:00<00:02, 88.02it/s, loss=8.833/8.464]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 122.33it/s, loss=8.719/8.996]


8.718575936126708


  4%|▍         | 10/250 [00:00<00:02, 97.46it/s, loss=8.664/8.993]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 119.91it/s, loss=8.554/8.189]


8.55384531528473


  4%|▍         | 11/250 [00:00<00:02, 107.46it/s, loss=8.670/8.790]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 122.15it/s, loss=8.397/8.337]


8.396745621376043


100%|██████████| 50/50 [00:00<00:00, 119.16it/s]

lr:0.01; reg:1; acc:0.35



  4%|▍         | 10/250 [00:00<00:02, 85.62it/s, loss=19.644/19.481]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 121.84it/s, loss=17.969/16.801]

17.969020517578123



  5%|▍         | 12/250 [00:00<00:02, 118.74it/s, loss=16.873/16.700]

epoch:1


100%|██████████| 250/250 [00:01<00:00, 129.32it/s, loss=16.377/15.570]

16.377091684265135



  4%|▎         | 9/250 [00:00<00:02, 85.24it/s, loss=15.896/15.870]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 121.99it/s, loss=15.439/14.289]


15.439497570953368


  4%|▍         | 10/250 [00:00<00:02, 98.39it/s, loss=15.102/14.895]

epoch:3


100%|██████████| 250/250 [00:01<00:00, 125.89it/s, loss=14.744/14.114]

14.744209224548337



  4%|▍         | 10/250 [00:00<00:02, 94.22it/s, loss=14.399/14.042]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 127.99it/s, loss=14.160/14.170]


14.160371120529177


  4%|▍         | 10/250 [00:00<00:02, 96.87it/s, loss=13.837/13.768]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 121.74it/s, loss=13.643/13.413]

13.643439978256223



  4%|▍         | 10/250 [00:00<00:02, 95.82it/s, loss=13.408/13.001]

epoch:6


100%|██████████| 250/250 [00:01<00:00, 126.54it/s, loss=13.176/12.886]

13.176308521728513



  4%|▍         | 10/250 [00:00<00:02, 96.69it/s, loss=13.035/12.776]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 120.91it/s, loss=12.747/12.281]

12.74721038528442



  4%|▍         | 11/250 [00:00<00:02, 108.55it/s, loss=12.527/12.777]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 124.65it/s, loss=12.349/12.204]

12.348974196624752



  4%|▍         | 11/250 [00:00<00:02, 105.74it/s, loss=12.121/12.077]

epoch:9


100%|██████████| 250/250 [00:01<00:00, 127.33it/s, loss=11.979/12.211]

11.979202127990714



  4%|▎         | 9/250 [00:00<00:02, 86.73it/s, loss=11.855/11.696]

epoch:10


100%|██████████| 250/250 [00:01<00:00, 125.35it/s, loss=11.632/11.460]


11.63214548751831


  4%|▍         | 10/250 [00:00<00:02, 96.61it/s, loss=11.533/11.678]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 120.53it/s, loss=11.307/11.119]


11.307030237045288


  4%|▎         | 9/250 [00:00<00:02, 89.05it/s, loss=11.115/11.101]

epoch:12


100%|██████████| 250/250 [00:01<00:00, 132.81it/s, loss=11.002/11.101]

11.001533670196528



  4%|▎         | 9/250 [00:00<00:02, 89.03it/s, loss=10.900/10.879]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 121.09it/s, loss=10.715/10.523]

10.71457227821351



  4%|▍         | 10/250 [00:00<00:02, 96.61it/s, loss=10.630/10.252]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 124.07it/s, loss=10.442/10.351]


10.442303704071035


  4%|▎         | 9/250 [00:00<00:02, 89.36it/s, loss=10.242/10.240]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 121.41it/s, loss=10.187/10.199]

10.18744029663086



  4%|▍         | 10/250 [00:00<00:02, 96.44it/s, loss=10.111/10.343]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 125.81it/s, loss=9.944/9.997]

9.94431532180786



  4%|▍         | 10/250 [00:00<00:02, 96.37it/s, loss=9.745/9.650]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 123.91it/s, loss=9.716/9.661]


9.715758679504392


  4%|▎         | 9/250 [00:00<00:02, 85.95it/s, loss=9.567/9.770]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 120.07it/s, loss=9.499/9.556]

9.49941695266724



  4%|▍         | 10/250 [00:00<00:02, 99.72it/s, loss=9.302/9.030]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 124.29it/s, loss=9.295/9.145]

9.294908153419486



  4%|▎         | 9/250 [00:00<00:02, 87.74it/s, loss=9.241/9.125]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 118.68it/s, loss=9.100/9.518]

9.09957903896332



  4%|▎         | 9/250 [00:00<00:02, 89.87it/s, loss=8.921/8.912]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 123.19it/s, loss=8.916/9.140]


8.916270267791747


  4%|▍         | 10/250 [00:00<00:02, 91.27it/s, loss=8.849/9.194]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 122.42it/s, loss=8.742/9.105]


8.741738113861084


  4%|▍         | 11/250 [00:00<00:02, 106.59it/s, loss=8.637/8.619]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 123.04it/s, loss=8.576/8.549]


8.576164241523744


  4%|▍         | 10/250 [00:00<00:02, 96.82it/s, loss=8.518/8.272]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 124.48it/s, loss=8.418/8.312]


8.418285245666505


100%|██████████| 50/50 [00:00<00:00, 112.47it/s]

lr:0.01; reg:10; acc:0.3487



  4%|▎         | 9/250 [00:00<00:03, 77.26it/s, loss=19.302/19.268]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 119.83it/s, loss=17.923/16.761]


17.9230865510559


  4%|▍         | 11/250 [00:00<00:02, 103.54it/s, loss=16.955/16.633]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 120.77it/s, loss=16.266/15.695]

16.26553041168214



  4%|▎         | 9/250 [00:00<00:02, 84.98it/s, loss=15.698/15.638]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 124.84it/s, loss=15.270/14.888]

15.26991767105103



  4%|▍         | 10/250 [00:00<00:02, 97.16it/s, loss=14.875/14.897]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 121.12it/s, loss=14.561/14.336]

14.560876787719728



  5%|▍         | 12/250 [00:00<00:02, 114.49it/s, loss=14.302/13.822]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 123.37it/s, loss=13.983/14.085]

13.983393429260252



  4%|▎         | 9/250 [00:00<00:02, 88.95it/s, loss=13.793/14.011]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 119.59it/s, loss=13.476/12.644]

13.475838845596313



  4%|▍         | 10/250 [00:00<00:02, 97.71it/s, loss=13.312/12.845]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 121.88it/s, loss=13.018/12.764]

13.01848428672791



  4%|▍         | 10/250 [00:00<00:02, 98.85it/s, loss=12.899/13.073]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 123.17it/s, loss=12.600/12.540]

12.600032804183966



  4%|▍         | 11/250 [00:00<00:02, 108.53it/s, loss=12.229/11.619]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 129.02it/s, loss=12.211/12.060]

12.210707184753415



  4%|▎         | 9/250 [00:00<00:02, 89.15it/s, loss=12.062/11.653]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 118.54it/s, loss=11.849/11.391]


11.849416738967893


  4%|▎         | 9/250 [00:00<00:02, 89.84it/s, loss=11.661/11.352]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 121.90it/s, loss=11.510/11.834]


11.510071171951296


  4%|▎         | 9/250 [00:00<00:02, 87.56it/s, loss=11.242/11.149]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 123.05it/s, loss=11.192/10.933]

11.191961520156848



  4%|▍         | 11/250 [00:00<00:02, 103.76it/s, loss=11.112/10.635]

epoch:12


100%|██████████| 250/250 [00:01<00:00, 127.51it/s, loss=10.893/10.760]

10.89263427009582



  4%|▍         | 11/250 [00:00<00:02, 105.09it/s, loss=10.730/10.923]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 120.36it/s, loss=10.611/10.633]

10.611241938858035



  4%|▍         | 10/250 [00:00<00:02, 99.36it/s, loss=10.394/10.013]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 122.96it/s, loss=10.345/9.885]


10.345049800415048


  4%|▍         | 10/250 [00:00<00:02, 96.45it/s, loss=10.291/10.523]

epoch:15


100%|██████████| 250/250 [00:01<00:00, 126.68it/s, loss=10.093/10.497]


10.09336397888183


  4%|▍         | 11/250 [00:00<00:02, 104.98it/s, loss=10.012/9.880] 

epoch:16


100%|██████████| 250/250 [00:01<00:00, 126.04it/s, loss=9.857/10.084]

9.856569747695923



  2%|▏         | 5/250 [00:00<00:04, 49.49it/s, loss=9.785/9.889]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 122.06it/s, loss=9.631/9.345]

9.630577645111076



  5%|▍         | 12/250 [00:00<00:02, 115.90it/s, loss=9.492/9.859]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 121.46it/s, loss=9.418/9.419]

9.418150564422604



  4%|▍         | 10/250 [00:00<00:02, 95.19it/s, loss=9.308/9.041]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 124.91it/s, loss=9.218/8.928]


9.218057455062864


  5%|▍         | 12/250 [00:00<00:02, 116.57it/s, loss=9.178/9.533]

epoch:20


100%|██████████| 250/250 [00:01<00:00, 129.54it/s, loss=9.027/9.196]

9.02686303646087



  4%|▎         | 9/250 [00:00<00:02, 83.84it/s, loss=8.884/8.916]

epoch:21


100%|██████████| 250/250 [00:01<00:00, 126.73it/s, loss=8.844/8.819]


8.844129444732669


  4%|▎         | 9/250 [00:00<00:02, 85.91it/s, loss=8.888/8.930]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 123.89it/s, loss=8.675/8.219]

8.674747849121092



  4%|▍         | 10/250 [00:00<00:02, 98.19it/s, loss=8.694/8.937]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 126.29it/s, loss=8.512/8.383]

8.511701023139956



  4%|▎         | 9/250 [00:00<00:02, 87.11it/s, loss=8.439/8.794]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 120.65it/s, loss=8.358/8.555]


8.357806935462953


100%|██████████| 50/50 [00:00<00:00, 115.40it/s]

lr:0.01; reg:100; acc:0.3522



  4%|▎         | 9/250 [00:00<00:02, 84.99it/s, loss=19.236/18.851]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 125.20it/s, loss=17.468/16.591]

17.468252722473146



  4%|▍         | 10/250 [00:00<00:02, 96.71it/s, loss=16.500/16.337]

epoch:1


100%|██████████| 250/250 [00:01<00:00, 126.10it/s, loss=16.083/15.550]


16.083373804321287


  4%|▎         | 9/250 [00:00<00:02, 87.45it/s, loss=15.639/15.621]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 123.63it/s, loss=15.239/15.074]

15.239159704742429



  4%|▍         | 10/250 [00:00<00:02, 96.66it/s, loss=14.844/14.681]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 122.76it/s, loss=14.578/13.791]

14.577643055725096



  4%|▍         | 10/250 [00:00<00:02, 97.81it/s, loss=14.250/14.285]

epoch:4


100%|██████████| 250/250 [00:01<00:00, 127.44it/s, loss=14.015/13.621]


14.015341282730104


  4%|▍         | 10/250 [00:00<00:02, 94.54it/s, loss=13.735/13.505]

epoch:5


100%|██████████| 250/250 [00:01<00:00, 126.39it/s, loss=13.515/14.041]


13.514606290130615


  4%|▍         | 11/250 [00:00<00:02, 105.09it/s, loss=13.236/13.397]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 123.64it/s, loss=13.058/12.444]

13.058060020141607



  4%|▍         | 11/250 [00:00<00:02, 109.25it/s, loss=12.855/13.048]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 125.94it/s, loss=12.638/12.182]

12.638117534713752



  5%|▍         | 12/250 [00:00<00:02, 114.44it/s, loss=12.476/12.318]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 127.22it/s, loss=12.248/11.980]

12.247604243240362



  4%|▍         | 11/250 [00:00<00:02, 102.91it/s, loss=12.055/11.969]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 120.61it/s, loss=11.885/11.468]

11.885130422210695



  4%|▍         | 10/250 [00:00<00:02, 96.57it/s, loss=11.657/11.919]

epoch:10


100%|██████████| 250/250 [00:01<00:00, 125.42it/s, loss=11.544/11.411]

11.543612541275026



  4%|▎         | 9/250 [00:00<00:02, 88.69it/s, loss=11.282/11.029]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 121.20it/s, loss=11.224/11.546]


11.22448576576233


  4%|▎         | 9/250 [00:00<00:02, 88.08it/s, loss=11.008/10.582]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 121.41it/s, loss=10.925/10.292]


10.925404503478996


  4%|▍         | 10/250 [00:00<00:02, 97.54it/s, loss=10.873/11.459]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 125.14it/s, loss=10.641/10.268]


10.641123804855342


  4%|▍         | 10/250 [00:00<00:02, 96.74it/s, loss=10.448/10.863]

epoch:14


100%|██████████| 250/250 [00:01<00:00, 126.37it/s, loss=10.373/10.602]

10.373445298538208



  4%|▎         | 9/250 [00:00<00:02, 85.44it/s, loss=10.282/10.354]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 123.08it/s, loss=10.122/10.390]

10.12238635894775



  4%|▍         | 11/250 [00:00<00:02, 108.42it/s, loss=10.022/10.226]

epoch:16


100%|██████████| 250/250 [00:02<00:00, 118.69it/s, loss=9.884/10.016]

9.88352467338562



  4%|▎         | 9/250 [00:00<00:02, 87.30it/s, loss=9.768/10.001]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 118.63it/s, loss=9.658/9.824]

9.65782411117553



  4%|▍         | 11/250 [00:00<00:02, 109.10it/s, loss=9.438/9.021]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 123.91it/s, loss=9.444/9.556]

9.443991833953858



  4%|▍         | 11/250 [00:00<00:02, 109.14it/s, loss=9.333/9.178]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 121.90it/s, loss=9.243/9.073]

9.242528081741336



  5%|▍         | 12/250 [00:00<00:02, 116.64it/s, loss=9.116/9.115]

epoch:20


100%|██████████| 250/250 [00:01<00:00, 127.31it/s, loss=9.050/8.620]

9.049820881271362



  4%|▍         | 11/250 [00:00<00:02, 104.55it/s, loss=8.982/8.931]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 123.67it/s, loss=8.869/8.055]

8.869377263755792



  4%|▎         | 9/250 [00:00<00:02, 84.33it/s, loss=8.673/8.389]

epoch:22


100%|██████████| 250/250 [00:01<00:00, 126.07it/s, loss=8.696/8.521]

8.69579874507904



  4%|▎         | 9/250 [00:00<00:02, 83.70it/s, loss=8.700/8.785]

epoch:23


100%|██████████| 250/250 [00:01<00:00, 127.60it/s, loss=8.533/8.338]


8.532903923301694


  4%|▎         | 9/250 [00:00<00:02, 88.37it/s, loss=8.589/8.596]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 127.47it/s, loss=8.378/8.445]


8.37849558074951


100%|██████████| 50/50 [00:00<00:00, 129.35it/s]

lr:0.01; reg:1000; acc:0.3475



  4%|▎         | 9/250 [00:00<00:02, 88.71it/s, loss=19.567/19.562]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 124.07it/s, loss=18.065/17.181]

18.065440831756597



  5%|▍         | 12/250 [00:00<00:02, 117.21it/s, loss=17.088/17.040]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 120.51it/s, loss=16.425/15.664]

16.42518378341675



  4%|▎         | 9/250 [00:00<00:03, 76.56it/s, loss=15.864/15.898]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 121.87it/s, loss=15.456/15.496]


15.456286075592045


  4%|▍         | 10/250 [00:00<00:02, 98.29it/s, loss=15.029/14.582]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 124.08it/s, loss=14.752/14.521]


14.752382411041259


  4%|▍         | 11/250 [00:00<00:02, 107.73it/s, loss=14.549/14.482]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 123.82it/s, loss=14.165/13.835]

14.164590626068115



  4%|▎         | 9/250 [00:00<00:02, 88.69it/s, loss=13.920/13.515]

epoch:5


100%|██████████| 250/250 [00:01<00:00, 126.83it/s, loss=13.646/13.564]

13.646297334213262



  4%|▍         | 10/250 [00:00<00:02, 97.98it/s, loss=13.436/13.498]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 124.20it/s, loss=13.179/13.036]


13.178773640670775


  4%|▍         | 10/250 [00:00<00:02, 100.00it/s, loss=12.999/12.613]

epoch:7


100%|██████████| 250/250 [00:01<00:00, 128.52it/s, loss=12.750/12.648]

12.749676422195439



  4%|▍         | 11/250 [00:00<00:02, 102.10it/s, loss=12.429/12.402]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 122.07it/s, loss=12.351/12.496]

12.350718756408687



  4%|▍         | 10/250 [00:00<00:02, 99.80it/s, loss=12.119/11.887]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 124.60it/s, loss=11.980/12.255]

11.980370386810305



  4%|▍         | 11/250 [00:00<00:02, 104.09it/s, loss=11.809/12.035]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 124.74it/s, loss=11.634/11.531]

11.63373385154725



  4%|▍         | 10/250 [00:00<00:02, 95.83it/s, loss=11.359/11.429]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 123.76it/s, loss=11.308/11.089]

11.307634508590697



  4%|▍         | 10/250 [00:00<00:02, 96.69it/s, loss=11.079/10.694]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 122.01it/s, loss=11.003/10.600]

11.00273172706604



  4%|▎         | 9/250 [00:00<00:02, 88.38it/s, loss=10.958/10.756]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 118.93it/s, loss=10.714/10.592]


10.713924961776744


  4%|▎         | 9/250 [00:00<00:02, 81.64it/s, loss=10.573/10.637]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 124.93it/s, loss=10.442/10.124]

10.442297568588257



  4%|▍         | 10/250 [00:00<00:02, 99.20it/s, loss=10.446/10.534]

epoch:15


100%|██████████| 250/250 [00:01<00:00, 128.71it/s, loss=10.185/9.831]


10.184775684585569


  4%|▎         | 9/250 [00:00<00:02, 89.47it/s, loss=10.111/9.990] 

epoch:16


100%|██████████| 250/250 [00:02<00:00, 122.53it/s, loss=9.944/9.944]

9.943885758209229



  4%|▍         | 10/250 [00:00<00:02, 96.50it/s, loss=9.654/9.540]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 122.47it/s, loss=9.716/10.406]

9.715616761322023



  4%|▍         | 10/250 [00:00<00:02, 96.77it/s, loss=9.718/9.633]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 129.28it/s, loss=9.499/9.093]


9.498930023040769


  4%|▍         | 10/250 [00:00<00:02, 96.51it/s, loss=9.386/9.264]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 121.04it/s, loss=9.293/9.107]

9.293206131553653



  5%|▍         | 12/250 [00:00<00:02, 113.44it/s, loss=9.241/9.941]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 124.80it/s, loss=9.098/8.978]


9.097881811218265


  4%|▎         | 9/250 [00:00<00:02, 87.77it/s, loss=8.942/8.792]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 118.94it/s, loss=8.915/9.348]

8.914557628631597



  5%|▍         | 12/250 [00:00<00:01, 119.21it/s, loss=8.760/8.699]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 124.82it/s, loss=8.740/9.115]


8.740255799903867


  5%|▍         | 12/250 [00:00<00:02, 117.99it/s, loss=8.721/8.665]

epoch:23


100%|██████████| 250/250 [00:01<00:00, 128.88it/s, loss=8.573/8.085]

8.57292573589325



  4%|▍         | 10/250 [00:00<00:02, 96.67it/s, loss=8.428/8.704]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 120.76it/s, loss=8.417/8.392]


8.416777057037352


100%|██████████| 50/50 [00:00<00:00, 121.33it/s]


lr:0.001; reg:0; acc:0.3501


  4%|▎         | 9/250 [00:00<00:02, 84.94it/s, loss=18.988/18.893]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 117.27it/s, loss=17.775/17.542]

17.775390431213378



  4%|▎         | 9/250 [00:00<00:02, 85.96it/s, loss=16.622/16.472]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 118.30it/s, loss=16.190/15.924]


16.19020367370605


  4%|▍         | 11/250 [00:00<00:02, 106.08it/s, loss=15.618/15.936]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 123.71it/s, loss=15.262/15.129]

15.26187125106812



  4%|▍         | 11/250 [00:00<00:02, 107.87it/s, loss=14.828/14.494]

epoch:3


100%|██████████| 250/250 [00:01<00:00, 127.16it/s, loss=14.572/14.235]

14.572403966064465



  4%|▍         | 10/250 [00:00<00:02, 96.81it/s, loss=14.243/14.082]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 116.12it/s, loss=13.995/13.657]

13.995466187973019



  4%|▍         | 10/250 [00:00<00:02, 99.62it/s, loss=13.804/13.883]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 127.29it/s, loss=13.488/13.384]


13.487838833770759


  4%|▎         | 9/250 [00:00<00:02, 88.85it/s, loss=13.183/13.052]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 122.85it/s, loss=13.028/12.996]


13.028180516281113


  5%|▍         | 12/250 [00:00<00:02, 116.85it/s, loss=12.787/12.774]

epoch:7


100%|██████████| 250/250 [00:01<00:00, 125.20it/s, loss=12.606/12.134]

12.6064019708252



  4%|▍         | 10/250 [00:00<00:02, 96.90it/s, loss=12.404/11.927]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 126.26it/s, loss=12.216/11.938]

12.215849483184806



  4%|▍         | 10/250 [00:00<00:02, 98.63it/s, loss=11.977/12.128]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 121.49it/s, loss=11.853/11.238]

11.852835023651112



  4%|▍         | 10/250 [00:00<00:02, 94.47it/s, loss=11.711/11.513]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 121.52it/s, loss=11.511/11.363]

11.51145655311584



  4%|▍         | 10/250 [00:00<00:02, 96.40it/s, loss=11.231/11.116]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 123.14it/s, loss=11.195/11.005]

11.194531906051637



  4%|▍         | 10/250 [00:00<00:02, 95.89it/s, loss=11.069/10.912]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 122.63it/s, loss=10.893/10.532]

10.893212919616692



  4%|▍         | 11/250 [00:00<00:02, 108.80it/s, loss=10.721/10.951]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 123.11it/s, loss=10.611/10.821]

10.611330493316649



  4%|▎         | 9/250 [00:00<00:03, 78.83it/s, loss=10.360/10.229]

epoch:14


100%|██████████| 250/250 [00:01<00:00, 126.20it/s, loss=10.346/10.335]

10.346473124008172



  4%|▎         | 9/250 [00:00<00:02, 86.98it/s, loss=10.150/9.543]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 121.42it/s, loss=10.094/9.529]

10.093899289093015



  4%|▍         | 10/250 [00:00<00:02, 97.12it/s, loss=9.934/9.832]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 128.44it/s, loss=9.857/9.205]

9.857076516113285



  4%|▎         | 9/250 [00:00<00:02, 89.93it/s, loss=9.711/9.640]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 118.07it/s, loss=9.633/9.845]


9.632610725097651


  4%|▍         | 10/250 [00:00<00:02, 98.46it/s, loss=9.568/9.662]

epoch:18


100%|██████████| 250/250 [00:01<00:00, 126.45it/s, loss=9.419/9.160]


9.419052073593136


  4%|▎         | 9/250 [00:00<00:02, 85.72it/s, loss=9.244/8.998]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 122.21it/s, loss=9.220/9.038]

9.219737656555177



  4%|▍         | 11/250 [00:00<00:02, 109.72it/s, loss=9.215/9.364]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 118.49it/s, loss=9.029/8.866]

9.028549888343811



  4%|▎         | 9/250 [00:00<00:02, 87.94it/s, loss=8.994/9.299]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 129.85it/s, loss=8.846/9.032]

8.845897173004156



  4%|▎         | 9/250 [00:00<00:02, 88.22it/s, loss=8.788/8.798]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 124.37it/s, loss=8.677/8.831]

8.676664601898196



  4%|▍         | 10/250 [00:00<00:02, 95.03it/s, loss=8.576/8.659]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 120.08it/s, loss=8.514/8.421]

8.513885996475215



  4%|▎         | 9/250 [00:00<00:02, 89.79it/s, loss=8.455/8.146]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 127.09it/s, loss=8.360/8.338]


8.35952166179657


100%|██████████| 50/50 [00:00<00:00, 108.54it/s]

lr:0.001; reg:1; acc:0.3514



  4%|▍         | 10/250 [00:00<00:02, 97.76it/s, loss=20.168/19.552]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 125.14it/s, loss=18.034/17.202]


18.033984776916498


  5%|▍         | 12/250 [00:00<00:02, 116.06it/s, loss=16.988/17.194]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 117.16it/s, loss=16.352/15.624]

16.352124181213384



  4%|▍         | 10/250 [00:00<00:02, 94.93it/s, loss=15.827/15.890]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 119.29it/s, loss=15.384/15.362]

15.384278295898437



  5%|▍         | 12/250 [00:00<00:02, 114.08it/s, loss=14.964/14.986]

epoch:3


100%|██████████| 250/250 [00:01<00:00, 125.62it/s, loss=14.679/14.644]


14.679037061614984


  4%|▎         | 9/250 [00:00<00:02, 89.46it/s, loss=14.322/13.972]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 118.33it/s, loss=14.096/13.897]


14.09616523162842


  4%|▍         | 10/250 [00:00<00:02, 98.17it/s, loss=13.791/13.645]

epoch:5


100%|██████████| 250/250 [00:01<00:00, 125.47it/s, loss=13.581/13.387]

13.581374652557367



  4%|▎         | 9/250 [00:00<00:02, 85.86it/s, loss=13.383/12.803]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 122.60it/s, loss=13.118/12.939]

13.118391951065059



  4%|▎         | 9/250 [00:00<00:02, 89.43it/s, loss=12.818/13.038]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 124.56it/s, loss=12.695/12.702]


12.694534031677245


  4%|▍         | 11/250 [00:00<00:02, 105.90it/s, loss=12.499/12.539]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 123.47it/s, loss=12.300/12.305]


12.299558442230227


  4%|▍         | 10/250 [00:00<00:02, 94.58it/s, loss=12.154/11.988]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 124.85it/s, loss=11.933/11.564]

11.933209064941408



  4%|▎         | 9/250 [00:00<00:02, 84.38it/s, loss=11.659/11.560]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 124.86it/s, loss=11.591/11.597]


11.591495243759159


  4%|▍         | 10/250 [00:00<00:02, 88.90it/s, loss=11.410/11.485]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 124.79it/s, loss=11.269/11.163]

11.269331640014645



  4%|▍         | 11/250 [00:00<00:02, 109.00it/s, loss=11.185/11.063]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 121.31it/s, loss=10.968/10.843]

10.967743962478643



  4%|▎         | 9/250 [00:00<00:02, 88.25it/s, loss=10.708/10.795]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 120.90it/s, loss=10.682/10.708]

10.682146774215695



  4%|▎         | 9/250 [00:00<00:02, 86.01it/s, loss=10.567/10.317]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 121.51it/s, loss=10.414/10.090]


10.413652985076906


  4%|▍         | 10/250 [00:00<00:02, 96.91it/s, loss=10.262/10.626]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 120.05it/s, loss=10.160/9.763]


10.159738474807734


  4%|▍         | 10/250 [00:00<00:02, 99.35it/s, loss=10.050/9.917] 

epoch:16


100%|██████████| 250/250 [00:02<00:00, 123.59it/s, loss=9.919/9.821]


9.919415574264526


  4%|▎         | 9/250 [00:00<00:02, 87.18it/s, loss=9.891/9.789]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 124.54it/s, loss=9.693/9.770]

9.69307523506165



  4%|▍         | 10/250 [00:00<00:02, 97.60it/s, loss=9.626/9.471]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 121.71it/s, loss=9.478/9.202]

9.4776422203064



  4%|▎         | 9/250 [00:00<00:02, 85.11it/s, loss=9.198/9.330]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 122.64it/s, loss=9.275/8.973]

9.274587596549988



  4%|▎         | 9/250 [00:00<00:02, 89.56it/s, loss=9.239/9.174]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 124.39it/s, loss=9.082/8.511]

9.081513558807378



  4%|▎         | 9/250 [00:00<00:02, 88.08it/s, loss=8.936/8.479]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 121.86it/s, loss=8.897/8.816]


8.89739621234894


  4%|▍         | 11/250 [00:00<00:02, 105.73it/s, loss=8.778/8.477]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 124.00it/s, loss=8.725/8.764]

8.724506201362608



  4%|▍         | 10/250 [00:00<00:02, 99.60it/s, loss=8.588/8.832]

epoch:23


100%|██████████| 250/250 [00:01<00:00, 125.57it/s, loss=8.560/8.416]


8.559902980346685


  4%|▍         | 10/250 [00:00<00:02, 97.98it/s, loss=8.383/7.986]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 118.94it/s, loss=8.404/8.378]


8.403949065551762


100%|██████████| 50/50 [00:00<00:00, 127.41it/s]

lr:0.001; reg:10; acc:0.3472



  4%|▍         | 10/250 [00:00<00:02, 94.56it/s, loss=19.242/19.000]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 122.86it/s, loss=17.817/16.464]

17.81687296630861



  4%|▍         | 11/250 [00:00<00:02, 108.12it/s, loss=16.753/16.871]

epoch:1


100%|██████████| 250/250 [00:01<00:00, 125.37it/s, loss=16.298/15.707]


16.298202842559814


  4%|▍         | 11/250 [00:00<00:02, 103.35it/s, loss=15.631/15.561]

epoch:2


100%|██████████| 250/250 [00:01<00:00, 125.44it/s, loss=15.367/15.091]


15.366793800048828


  5%|▍         | 12/250 [00:00<00:02, 115.97it/s, loss=15.025/15.330]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 126.16it/s, loss=14.671/14.528]

14.670603415527346



  4%|▍         | 10/250 [00:00<00:02, 97.36it/s, loss=14.263/14.001]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 121.91it/s, loss=14.090/13.616]


14.08967500144957


  4%|▍         | 11/250 [00:00<00:02, 105.68it/s, loss=13.832/13.813]

epoch:5


100%|██████████| 250/250 [00:01<00:00, 125.27it/s, loss=13.579/13.202]


13.579432745590205


  4%|▍         | 11/250 [00:00<00:02, 108.79it/s, loss=13.318/13.385]

epoch:6


100%|██████████| 250/250 [00:01<00:00, 125.68it/s, loss=13.119/13.290]


13.119154817428582


  4%|▎         | 9/250 [00:00<00:02, 88.69it/s, loss=12.900/13.230]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 117.39it/s, loss=12.694/12.140]

12.694115006256094



  4%|▍         | 11/250 [00:00<00:02, 109.05it/s, loss=12.435/12.252]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 119.89it/s, loss=12.301/12.150]


12.301462850646963


  4%|▍         | 10/250 [00:00<00:02, 99.80it/s, loss=12.048/11.995]

epoch:9


100%|██████████| 250/250 [00:01<00:00, 126.24it/s, loss=11.935/12.022]


11.934643515930178


  4%|▍         | 10/250 [00:00<00:02, 94.15it/s, loss=11.723/12.205]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 122.27it/s, loss=11.593/11.623]


11.593101393737799


  4%|▎         | 9/250 [00:00<00:02, 84.79it/s, loss=11.426/11.157]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 117.94it/s, loss=11.271/11.185]


11.27078248687744


  4%|▍         | 10/250 [00:00<00:02, 99.48it/s, loss=11.188/11.616]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 121.49it/s, loss=10.968/10.573]

10.967574376678469



  4%|▍         | 10/250 [00:00<00:02, 95.22it/s, loss=10.697/10.988]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 119.08it/s, loss=10.683/10.048]

10.682704357070918



  4%|▎         | 9/250 [00:00<00:02, 89.54it/s, loss=10.572/10.258]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 122.36it/s, loss=10.413/10.345]

10.41335864173889



  4%|▍         | 11/250 [00:00<00:02, 108.42it/s, loss=10.345/10.827]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 122.99it/s, loss=10.160/10.495]

10.160029200973504



  4%|▍         | 11/250 [00:00<00:02, 107.64it/s, loss=9.992/10.100]

epoch:16


100%|██████████| 250/250 [00:02<00:00, 123.85it/s, loss=9.920/9.889]

9.920095159454343



  4%|▍         | 10/250 [00:00<00:02, 99.01it/s, loss=9.790/9.737]

epoch:17


100%|██████████| 250/250 [00:01<00:00, 129.99it/s, loss=9.692/9.608]

9.692121521682738



  4%|▍         | 10/250 [00:00<00:02, 98.03it/s, loss=9.627/9.469]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 128.25it/s, loss=9.476/9.458]

9.475833778305054



  4%|▍         | 11/250 [00:00<00:02, 104.22it/s, loss=9.383/9.212]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 120.64it/s, loss=9.273/8.887]

9.272815906486512



  4%|▍         | 10/250 [00:00<00:02, 96.96it/s, loss=9.182/9.034]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 123.45it/s, loss=9.079/9.002]


9.078818856201172


  4%|▍         | 11/250 [00:00<00:02, 108.23it/s, loss=8.971/8.613]

epoch:21


100%|██████████| 250/250 [00:01<00:00, 126.44it/s, loss=8.896/8.167]

8.896321473464964



  4%|▍         | 10/250 [00:00<00:02, 97.17it/s, loss=8.754/9.008]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 126.50it/s, loss=8.722/9.003]


8.722133765907289


  4%|▍         | 11/250 [00:00<00:02, 105.96it/s, loss=8.656/8.359]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 119.41it/s, loss=8.557/8.622]

8.556841768455508



  5%|▍         | 12/250 [00:00<00:02, 114.24it/s, loss=8.465/8.748]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 127.62it/s, loss=8.401/8.337]

8.400782792434693



100%|██████████| 50/50 [00:00<00:00, 131.65it/s]

lr:0.001; reg:100; acc:0.3473



  4%|▎         | 9/250 [00:00<00:03, 78.06it/s, loss=19.465/19.184]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 123.02it/s, loss=18.115/17.492]


18.114575650177027


  4%|▎         | 9/250 [00:00<00:02, 85.94it/s, loss=17.101/17.111]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 120.40it/s, loss=16.470/15.569]

16.469669286956783



  4%|▍         | 10/250 [00:00<00:02, 98.08it/s, loss=15.710/15.513]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 122.78it/s, loss=15.458/14.820]

15.458016494903571



  4%|▍         | 11/250 [00:00<00:02, 107.86it/s, loss=15.108/14.988]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 120.62it/s, loss=14.725/14.444]

14.72525070083619



  4%|▍         | 10/250 [00:00<00:02, 95.24it/s, loss=14.445/14.845]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 124.73it/s, loss=14.128/13.856]

14.128181697082516



  4%|▍         | 10/250 [00:00<00:02, 95.56it/s, loss=13.815/13.827]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 117.08it/s, loss=13.607/12.952]


13.606831325836183


  4%|▍         | 11/250 [00:00<00:02, 105.92it/s, loss=13.318/13.381]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 121.29it/s, loss=13.140/13.219]


13.139517430572502


  4%|▍         | 10/250 [00:00<00:02, 99.37it/s, loss=12.802/13.236]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 122.81it/s, loss=12.712/12.665]


12.711506335906988


  4%|▍         | 10/250 [00:00<00:02, 96.57it/s, loss=12.449/11.986]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 124.04it/s, loss=12.316/11.935]

12.315891173706055



  5%|▍         | 12/250 [00:00<00:02, 116.39it/s, loss=12.066/12.167]

epoch:9


100%|██████████| 250/250 [00:01<00:00, 125.89it/s, loss=11.948/11.927]

11.94785441749573



  4%|▍         | 11/250 [00:00<00:02, 106.23it/s, loss=11.821/12.074]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 124.99it/s, loss=11.605/11.622]


11.60477532211304


  4%|▍         | 10/250 [00:00<00:02, 96.29it/s, loss=11.452/11.975]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 121.40it/s, loss=11.283/11.474]

11.282633854141231



  4%|▎         | 9/250 [00:00<00:03, 79.09it/s, loss=11.087/11.241]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 122.09it/s, loss=10.978/10.895]

10.97761380020142



  4%|▍         | 10/250 [00:00<00:02, 97.39it/s, loss=10.816/10.595]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 128.62it/s, loss=10.693/10.288]

10.692684843139658



  4%|▍         | 10/250 [00:00<00:02, 97.45it/s, loss=10.430/10.529]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 130.10it/s, loss=10.423/10.350]

10.42316743003846



  4%|▍         | 10/250 [00:00<00:02, 96.48it/s, loss=10.312/10.440]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 127.86it/s, loss=10.169/9.752]

10.168938097534179



  4%|▍         | 11/250 [00:00<00:02, 109.58it/s, loss=10.084/10.888]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 125.49it/s, loss=9.928/9.791]

9.927527895355226



  4%|▍         | 10/250 [00:00<00:02, 92.78it/s, loss=9.756/9.710]

epoch:17


100%|██████████| 250/250 [00:01<00:00, 127.30it/s, loss=9.700/9.322]

9.700439699630735



  4%|▍         | 10/250 [00:00<00:02, 97.05it/s, loss=9.518/9.714]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 124.87it/s, loss=9.484/9.283]

9.483954188613897



  4%|▎         | 9/250 [00:00<00:02, 88.70it/s, loss=9.582/9.186]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 118.95it/s, loss=9.281/9.144]


9.28097697544097


  4%|▍         | 11/250 [00:00<00:02, 108.86it/s, loss=9.164/9.212]

epoch:20


100%|██████████| 250/250 [00:01<00:00, 127.52it/s, loss=9.087/9.139]


9.087054932556153


  4%|▍         | 10/250 [00:00<00:02, 98.55it/s, loss=9.028/8.684]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 124.36it/s, loss=8.905/8.817]

8.90503146087646



  4%|▍         | 10/250 [00:00<00:02, 95.08it/s, loss=8.796/8.761]

epoch:22


100%|██████████| 250/250 [00:01<00:00, 126.65it/s, loss=8.731/8.404]

8.731385725173954



  4%|▍         | 10/250 [00:00<00:02, 98.86it/s, loss=8.661/8.001]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 120.69it/s, loss=8.567/8.920]

8.56728227005005



  4%|▎         | 9/250 [00:00<00:02, 88.28it/s, loss=8.438/8.235]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 123.24it/s, loss=8.410/8.567]


8.410364233436594


100%|██████████| 50/50 [00:00<00:00, 121.86it/s]

lr:0.001; reg:1000; acc:0.3526



  4%|▍         | 10/250 [00:00<00:02, 94.20it/s, loss=20.108/19.610]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 126.84it/s, loss=18.337/17.402]

18.337411482086186



  4%|▍         | 10/250 [00:00<00:02, 97.87it/s, loss=17.348/17.216]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 124.68it/s, loss=16.621/15.561]


16.620941335754388


  5%|▍         | 12/250 [00:00<00:02, 115.75it/s, loss=15.977/16.140]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 121.89it/s, loss=15.589/15.095]

15.588926910705581



  4%|▎         | 9/250 [00:00<00:02, 87.05it/s, loss=15.185/14.837]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 121.09it/s, loss=14.843/14.489]

14.843272276763916



  4%|▍         | 11/250 [00:00<00:02, 109.41it/s, loss=14.428/14.646]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 119.08it/s, loss=14.237/14.240]


14.236511812210079


  4%|▍         | 10/250 [00:00<00:02, 98.09it/s, loss=13.808/13.759]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 119.60it/s, loss=13.708/13.887]

13.708221221618656



  5%|▍         | 12/250 [00:00<00:02, 115.23it/s, loss=13.398/13.294]

epoch:6


100%|██████████| 250/250 [00:01<00:00, 126.30it/s, loss=13.233/12.966]


13.232530675125126


  4%|▍         | 11/250 [00:00<00:02, 105.60it/s, loss=12.883/12.480]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 122.87it/s, loss=12.797/12.269]


12.797113848190298


  4%|▎         | 9/250 [00:00<00:03, 76.44it/s, loss=12.448/11.992]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 123.18it/s, loss=12.393/12.453]

12.392867622680665



  4%|▎         | 9/250 [00:00<00:02, 87.25it/s, loss=12.228/12.010]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 123.78it/s, loss=12.021/11.682]

12.021069564895635



  4%|▍         | 10/250 [00:00<00:02, 98.67it/s, loss=11.837/11.639]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 119.98it/s, loss=11.671/11.727]

11.670883327560425



  4%|▍         | 11/250 [00:00<00:02, 104.65it/s, loss=11.535/11.689]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 124.52it/s, loss=11.344/11.114]

11.344246806869512



  4%|▍         | 10/250 [00:00<00:02, 96.44it/s, loss=11.122/11.203]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 124.09it/s, loss=11.037/11.032]

11.036515840377806



  4%|▍         | 11/250 [00:00<00:02, 105.48it/s, loss=10.907/10.949]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 126.62it/s, loss=10.747/10.441]


10.746897159729002


  4%|▎         | 9/250 [00:00<00:02, 89.30it/s, loss=10.711/10.297]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 129.16it/s, loss=10.473/10.499]

10.473347751693726



  4%|▎         | 9/250 [00:00<00:02, 86.27it/s, loss=10.254/9.906] 

epoch:15


100%|██████████| 250/250 [00:02<00:00, 123.74it/s, loss=10.216/9.537]


10.215916007995597


  4%|▍         | 10/250 [00:00<00:02, 93.91it/s, loss=9.998/9.869] 

epoch:16


100%|██████████| 250/250 [00:01<00:00, 125.87it/s, loss=9.972/10.205]


9.971601231613146


  4%|▍         | 11/250 [00:00<00:02, 107.04it/s, loss=9.837/9.731] 

epoch:17


100%|██████████| 250/250 [00:02<00:00, 124.92it/s, loss=9.742/10.022]

9.741800006866459



  4%|▍         | 10/250 [00:00<00:02, 96.60it/s, loss=9.634/9.892]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 124.97it/s, loss=9.524/9.145]

9.523716254730227



  4%|▎         | 9/250 [00:00<00:02, 88.72it/s, loss=9.456/9.727]

epoch:19


100%|██████████| 250/250 [00:01<00:00, 126.39it/s, loss=9.317/9.236]

9.317201367492673



  4%|▍         | 10/250 [00:00<00:02, 97.05it/s, loss=9.251/9.435]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 123.66it/s, loss=9.122/9.008]


9.121964575271605


  4%|▍         | 11/250 [00:00<00:02, 108.22it/s, loss=9.135/9.144]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 121.40it/s, loss=8.937/8.627]

8.937366710052487



  4%|▍         | 10/250 [00:00<00:02, 98.79it/s, loss=8.760/8.443]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 123.20it/s, loss=8.761/8.700]

8.760698606567379



  4%|▍         | 10/250 [00:00<00:02, 98.47it/s, loss=8.626/8.313]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 118.44it/s, loss=8.594/8.849]


8.594089012298586


  4%|▎         | 9/250 [00:00<00:02, 86.39it/s, loss=8.454/8.486]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 124.88it/s, loss=8.436/8.388]


8.435818824882505


100%|██████████| 50/50 [00:00<00:00, 123.03it/s]


lr:0.0001; reg:0; acc:0.3471


  4%|▍         | 10/250 [00:00<00:02, 93.25it/s, loss=19.617/19.156]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 124.67it/s, loss=17.979/16.972]

17.9791334576416



  5%|▍         | 12/250 [00:00<00:02, 114.42it/s, loss=16.987/17.130]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 122.65it/s, loss=16.342/15.441]

16.341670061492923



  4%|▍         | 10/250 [00:00<00:02, 97.98it/s, loss=15.719/15.520]

epoch:2


100%|██████████| 250/250 [00:01<00:00, 125.68it/s, loss=15.391/14.870]

15.39107568099976



  4%|▍         | 10/250 [00:00<00:02, 98.14it/s, loss=14.975/14.658]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 124.32it/s, loss=14.694/14.105]

14.694306156921387



  4%|▎         | 9/250 [00:00<00:02, 89.72it/s, loss=14.372/14.740]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 124.89it/s, loss=14.113/13.807]


14.113131959304816


  4%|▍         | 10/250 [00:00<00:02, 96.42it/s, loss=13.921/14.045]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 122.34it/s, loss=13.604/13.154]

13.604191775588989



  4%|▍         | 11/250 [00:00<00:02, 108.48it/s, loss=13.383/13.077]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 121.34it/s, loss=13.142/13.087]

13.141692460250837



  5%|▍         | 12/250 [00:00<00:01, 119.05it/s, loss=13.006/13.291]

epoch:7


100%|██████████| 250/250 [00:01<00:00, 126.34it/s, loss=12.718/12.851]

12.718229339065555



  4%|▎         | 9/250 [00:00<00:02, 89.21it/s, loss=12.556/12.612]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 125.52it/s, loss=12.323/12.346]

12.323397122192388



  4%|▎         | 9/250 [00:00<00:02, 89.49it/s, loss=12.113/12.307]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 125.94it/s, loss=11.956/12.302]

11.956430700759899



  5%|▍         | 12/250 [00:00<00:02, 114.62it/s, loss=11.835/12.071]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 121.69it/s, loss=11.613/11.104]


11.612928320846562


  4%|▍         | 10/250 [00:00<00:02, 99.21it/s, loss=11.394/11.376]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 121.71it/s, loss=11.291/10.943]

11.290728893661498



  4%|▎         | 9/250 [00:00<00:02, 86.90it/s, loss=11.237/11.285]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 122.10it/s, loss=10.987/10.725]


10.98683940368652


  4%|▎         | 9/250 [00:00<00:02, 86.84it/s, loss=10.781/10.862]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 127.16it/s, loss=10.699/10.614]

10.699194474792485



  4%|▎         | 9/250 [00:00<00:02, 88.51it/s, loss=10.462/10.578]

epoch:14


100%|██████████| 250/250 [00:01<00:00, 125.35it/s, loss=10.429/9.938]

10.428850740432742



  4%|▍         | 11/250 [00:00<00:02, 103.98it/s, loss=10.258/10.422]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 124.97it/s, loss=10.174/10.087]

10.173797702102659



  4%|▍         | 10/250 [00:00<00:02, 96.70it/s, loss=10.000/10.484]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 126.60it/s, loss=9.934/9.582]

9.934379642257689



  4%|▍         | 11/250 [00:00<00:02, 105.64it/s, loss=9.814/9.623]

epoch:17


100%|██████████| 250/250 [00:01<00:00, 126.87it/s, loss=9.705/9.131]

9.704915447845456



  4%|▍         | 10/250 [00:00<00:02, 97.64it/s, loss=9.496/9.222]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 123.69it/s, loss=9.489/9.718]

9.489214685974122



  4%|▍         | 11/250 [00:00<00:02, 106.68it/s, loss=9.399/9.574]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 121.95it/s, loss=9.285/9.274]


9.284805415344243


  4%|▎         | 9/250 [00:00<00:02, 88.31it/s, loss=9.102/8.931]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 118.52it/s, loss=9.089/8.820]


9.08877033294678


  4%|▍         | 11/250 [00:00<00:02, 104.80it/s, loss=8.974/8.591]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 122.79it/s, loss=8.907/8.875]

8.906654767799376



  4%|▍         | 10/250 [00:00<00:02, 96.42it/s, loss=8.784/8.671]

epoch:22


100%|██████████| 250/250 [00:01<00:00, 128.50it/s, loss=8.733/8.648]

8.732601161270132



  4%|▎         | 9/250 [00:00<00:02, 83.79it/s, loss=8.766/8.802]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 123.19it/s, loss=8.567/8.942]


8.567088791236875


  4%|▍         | 10/250 [00:00<00:02, 99.72it/s, loss=8.398/7.862]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 127.19it/s, loss=8.411/8.514]


8.411409208297728


100%|██████████| 50/50 [00:00<00:00, 115.79it/s]

lr:0.0001; reg:1; acc:0.3498



  4%|▎         | 9/250 [00:00<00:03, 78.21it/s, loss=19.699/19.130]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 120.43it/s, loss=17.843/16.336]

17.843364069061273



  4%|▎         | 9/250 [00:00<00:02, 88.49it/s, loss=16.801/16.907]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 120.42it/s, loss=16.265/16.056]


16.264874819183344


  4%|▍         | 11/250 [00:00<00:02, 106.76it/s, loss=15.831/16.197]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 122.37it/s, loss=15.345/15.008]

15.344906060028077



  4%|▍         | 11/250 [00:00<00:02, 107.55it/s, loss=14.894/14.869]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 122.46it/s, loss=14.657/14.585]

14.656910390014644



  4%|▍         | 10/250 [00:00<00:02, 98.10it/s, loss=14.348/14.543]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 123.05it/s, loss=14.078/13.595]


14.078325708770754


  4%|▍         | 10/250 [00:00<00:02, 97.63it/s, loss=13.747/13.791]

epoch:5


100%|██████████| 250/250 [00:01<00:00, 125.52it/s, loss=13.568/13.346]

13.568329654922492



  4%|▎         | 9/250 [00:00<00:03, 71.76it/s, loss=13.257/13.657]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 116.40it/s, loss=13.107/12.645]

13.106677808151247



  4%|▍         | 11/250 [00:00<00:02, 93.09it/s, loss=12.981/12.710]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 124.88it/s, loss=12.681/12.771]

12.680672839736939



  4%|▎         | 9/250 [00:00<00:02, 86.41it/s, loss=12.438/12.337]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 110.51it/s, loss=12.288/12.574]

12.288044298324586



  5%|▍         | 12/250 [00:00<00:02, 113.87it/s, loss=12.208/12.608]

epoch:9


100%|██████████| 250/250 [00:01<00:00, 131.00it/s, loss=11.921/11.704]

11.920953945693974



  5%|▍         | 12/250 [00:00<00:02, 117.24it/s, loss=11.755/11.420]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 121.77it/s, loss=11.577/11.112]


11.57719394111633


  4%|▎         | 9/250 [00:00<00:02, 89.99it/s, loss=11.371/11.632]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 121.24it/s, loss=11.256/11.013]

11.256006814804067



  4%|▍         | 11/250 [00:00<00:02, 107.26it/s, loss=11.006/11.073]

epoch:12


100%|██████████| 250/250 [00:01<00:00, 126.68it/s, loss=10.952/11.114]

10.95241332939149



  4%|▍         | 11/250 [00:00<00:02, 109.31it/s, loss=10.834/10.944]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 125.73it/s, loss=10.667/11.019]


10.66747649291992


  4%|▍         | 11/250 [00:00<00:02, 108.94it/s, loss=10.531/10.457]

epoch:14


100%|██████████| 250/250 [00:01<00:00, 127.37it/s, loss=10.398/10.002]

10.398258729782105



  4%|▍         | 10/250 [00:00<00:02, 99.77it/s, loss=10.276/10.198]

epoch:15


100%|██████████| 250/250 [00:01<00:00, 125.42it/s, loss=10.145/10.157]


10.145263962631232


  4%|▍         | 11/250 [00:00<00:02, 107.42it/s, loss=10.026/9.484]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 126.34it/s, loss=9.905/10.262]

9.905422025985715



  4%|▍         | 11/250 [00:00<00:02, 108.95it/s, loss=9.745/9.773]

epoch:17


100%|██████████| 250/250 [00:01<00:00, 126.22it/s, loss=9.678/9.836]

9.677787039260858



  4%|▎         | 9/250 [00:00<00:02, 87.40it/s, loss=9.536/9.413]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 122.75it/s, loss=9.463/9.163]

9.46324236679077



  4%|▍         | 10/250 [00:00<00:02, 96.94it/s, loss=9.258/9.117]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 119.26it/s, loss=9.261/9.894]


9.261266297454835


  4%|▍         | 11/250 [00:00<00:02, 109.35it/s, loss=9.274/9.618]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 115.35it/s, loss=9.067/8.923]

9.066704388237



  4%|▎         | 9/250 [00:00<00:02, 88.56it/s, loss=8.884/8.812]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 115.36it/s, loss=8.884/8.655]

8.883668427467347



  2%|▏         | 5/250 [00:00<00:04, 49.02it/s, loss=8.748/8.613]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 122.14it/s, loss=8.712/8.761]

8.71202649452209



  4%|▎         | 9/250 [00:00<00:02, 85.56it/s, loss=8.669/8.744]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 123.62it/s, loss=8.548/8.639]

8.547614670486448



  4%|▍         | 10/250 [00:00<00:02, 85.56it/s, loss=8.419/8.310]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 127.59it/s, loss=8.391/8.394]


8.391333829269408


100%|██████████| 50/50 [00:00<00:00, 121.01it/s]

lr:0.0001; reg:10; acc:0.3482



  4%|▍         | 10/250 [00:00<00:02, 94.76it/s, loss=19.268/19.456]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 120.07it/s, loss=17.736/16.500]


17.735602826538077


  4%|▎         | 9/250 [00:00<00:02, 87.85it/s, loss=16.710/16.280]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 118.70it/s, loss=16.193/15.716]

16.19252333602906



  5%|▍         | 12/250 [00:00<00:02, 116.87it/s, loss=15.617/15.325]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 119.97it/s, loss=15.272/15.205]

15.271585215911863



  5%|▍         | 12/250 [00:00<00:02, 116.92it/s, loss=14.963/14.330]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 127.01it/s, loss=14.581/14.673]


14.580661535339356


  4%|▎         | 9/250 [00:00<00:02, 85.44it/s, loss=14.297/14.252]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 122.39it/s, loss=14.004/14.368]


14.004395192108145


  4%|▍         | 11/250 [00:00<00:02, 107.62it/s, loss=13.664/13.573]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 122.02it/s, loss=13.495/13.351]


13.495178897857672


  4%|▍         | 10/250 [00:00<00:02, 98.18it/s, loss=13.199/13.458]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 119.97it/s, loss=13.036/12.531]


13.036387340011597


  4%|▎         | 9/250 [00:00<00:02, 85.43it/s, loss=12.858/12.910]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 122.77it/s, loss=12.615/12.425]

12.615405738449098



  5%|▍         | 12/250 [00:00<00:02, 118.57it/s, loss=12.373/12.470]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 128.47it/s, loss=12.226/11.837]

12.225703174285899



  4%|▎         | 9/250 [00:00<00:02, 87.94it/s, loss=11.922/12.030]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 124.53it/s, loss=11.862/12.350]

11.862044576797478



  4%|▍         | 10/250 [00:00<00:02, 95.04it/s, loss=11.598/11.568]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 121.28it/s, loss=11.523/11.413]

11.523047569885252



  4%|▍         | 11/250 [00:00<00:02, 105.83it/s, loss=11.406/11.052]

epoch:11


100%|██████████| 250/250 [00:01<00:00, 128.33it/s, loss=11.206/11.053]

11.205937382659917



  4%|▎         | 9/250 [00:00<00:02, 89.88it/s, loss=11.074/11.312]

epoch:12


100%|██████████| 250/250 [00:01<00:00, 128.49it/s, loss=10.905/10.555]

10.90539678237915



  5%|▍         | 12/250 [00:00<00:01, 119.16it/s, loss=10.667/10.868]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 127.79it/s, loss=10.624/10.932]

10.623878453445432



  4%|▍         | 11/250 [00:00<00:02, 105.62it/s, loss=10.583/10.111]

epoch:14


100%|██████████| 250/250 [00:01<00:00, 125.61it/s, loss=10.358/10.212]

10.35767799278259



  4%|▍         | 10/250 [00:00<00:02, 90.63it/s, loss=10.249/10.728]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 123.52it/s, loss=10.106/10.406]

10.106104180831906



  4%|▎         | 9/250 [00:00<00:02, 80.59it/s, loss=9.949/10.417]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 127.04it/s, loss=9.869/9.804]

9.869335813140868



  5%|▍         | 12/250 [00:00<00:02, 116.46it/s, loss=9.835/9.632]

epoch:17


100%|██████████| 250/250 [00:01<00:00, 126.73it/s, loss=9.643/9.106]


9.643426381988531


  4%|▍         | 11/250 [00:00<00:02, 108.10it/s, loss=9.630/9.182]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 122.51it/s, loss=9.431/9.656]

9.430701058654781



  4%|▎         | 9/250 [00:00<00:02, 89.67it/s, loss=9.280/8.947]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 120.09it/s, loss=9.230/9.189]

9.229888857421871



  4%|▍         | 10/250 [00:00<00:02, 98.01it/s, loss=9.043/9.173]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 122.80it/s, loss=9.039/8.832]


9.038958686256414


  4%|▍         | 10/250 [00:00<00:02, 99.60it/s, loss=8.960/9.083]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 120.11it/s, loss=8.858/8.967]

8.858356977462769



  4%|▍         | 10/250 [00:00<00:02, 98.68it/s, loss=8.659/8.413]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 120.69it/s, loss=8.686/8.486]

8.686308937492369



  5%|▍         | 12/250 [00:00<00:02, 116.14it/s, loss=8.556/8.694]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 124.15it/s, loss=8.523/8.240]


8.523461324348446


  4%|▍         | 10/250 [00:00<00:02, 97.23it/s, loss=8.400/8.431]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 124.92it/s, loss=8.369/8.124]

8.368876761970522



100%|██████████| 50/50 [00:00<00:00, 127.89it/s]


lr:0.0001; reg:100; acc:0.347


  4%|▍         | 11/250 [00:00<00:02, 95.60it/s, loss=19.823/19.920]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 126.32it/s, loss=18.198/16.923]


18.198355251464836


  4%|▎         | 9/250 [00:00<00:02, 86.93it/s, loss=16.989/17.022]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 121.53it/s, loss=16.326/15.657]


16.32638368881225


  4%|▍         | 11/250 [00:00<00:02, 105.38it/s, loss=15.714/15.622]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 121.80it/s, loss=15.296/14.840]


15.296421828765867


  4%|▍         | 11/250 [00:00<00:02, 108.19it/s, loss=14.894/14.801]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 119.96it/s, loss=14.598/14.031]

14.597937492370608



  5%|▍         | 12/250 [00:00<00:02, 115.42it/s, loss=14.178/13.823]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 120.99it/s, loss=14.026/13.816]


14.026244374313352


  5%|▍         | 12/250 [00:00<00:02, 115.79it/s, loss=13.733/13.506]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 122.83it/s, loss=13.524/12.942]

13.523635454635626



  4%|▍         | 10/250 [00:00<00:02, 99.10it/s, loss=13.247/13.140]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 118.42it/s, loss=13.068/12.490]

13.06751558227539



  4%|▍         | 10/250 [00:00<00:02, 95.99it/s, loss=12.775/12.509]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 121.57it/s, loss=12.651/12.007]

12.650522898635867



  4%|▎         | 9/250 [00:00<00:02, 88.84it/s, loss=12.424/12.579]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 123.86it/s, loss=12.261/12.256]


12.261276963195805


  4%|▍         | 11/250 [00:00<00:02, 108.34it/s, loss=12.211/12.348]

epoch:9


100%|██████████| 250/250 [00:01<00:00, 126.37it/s, loss=11.899/11.304]


11.89887530326843


  4%|▍         | 11/250 [00:00<00:02, 107.53it/s, loss=11.662/11.302]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 122.31it/s, loss=11.560/11.589]


11.559765959167477


  4%|▍         | 10/250 [00:00<00:02, 97.53it/s, loss=11.314/11.647]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 132.93it/s, loss=11.241/10.802]

11.241102325668331



  4%|▍         | 10/250 [00:00<00:02, 99.41it/s, loss=11.180/11.174]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 115.81it/s, loss=10.941/11.020]

10.941270544967658



  4%|▍         | 11/250 [00:00<00:02, 108.49it/s, loss=10.785/10.746]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 125.37it/s, loss=10.658/10.603]

10.658037485504149



  4%|▍         | 10/250 [00:00<00:02, 98.55it/s, loss=10.555/10.409]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 123.36it/s, loss=10.390/9.983]


10.39027307785034


  4%|▍         | 10/250 [00:00<00:02, 99.19it/s, loss=10.259/10.094]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 121.67it/s, loss=10.139/10.453]

10.138864883041384



  4%|▎         | 9/250 [00:00<00:02, 89.67it/s, loss=9.928/9.519] 

epoch:16


100%|██████████| 250/250 [00:02<00:00, 119.06it/s, loss=9.899/10.383]

9.89937598014831



  4%|▎         | 9/250 [00:00<00:02, 89.25it/s, loss=9.794/10.088]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 123.00it/s, loss=9.674/9.322]


9.6736373890686


  4%|▍         | 10/250 [00:00<00:02, 95.61it/s, loss=9.462/9.467]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 121.16it/s, loss=9.461/9.286]

9.460572915954582



  5%|▍         | 12/250 [00:00<00:02, 112.75it/s, loss=9.345/9.305]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 120.72it/s, loss=9.257/9.380]

9.257415231361383



  4%|▎         | 9/250 [00:00<00:02, 87.36it/s, loss=9.214/9.175]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 123.21it/s, loss=9.065/9.175]


9.064605998725892


  4%|▍         | 10/250 [00:00<00:02, 93.83it/s, loss=8.992/8.949]

epoch:21


100%|██████████| 250/250 [00:01<00:00, 125.65it/s, loss=8.884/8.729]


8.883981564712528


  4%|▍         | 11/250 [00:00<00:02, 105.20it/s, loss=8.756/8.894]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 123.83it/s, loss=8.709/8.510]

8.709008979644771



  4%|▍         | 10/250 [00:00<00:02, 93.83it/s, loss=8.569/8.926]

epoch:23


100%|██████████| 250/250 [00:01<00:00, 126.09it/s, loss=8.546/8.408]

8.546164699440004



  4%|▍         | 10/250 [00:00<00:02, 94.52it/s, loss=8.256/8.418]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 122.09it/s, loss=8.392/8.306]


8.391663141860963


100%|██████████| 50/50 [00:00<00:00, 121.94it/s]


lr:0.0001; reg:1000; acc:0.3518


  4%|▎         | 9/250 [00:00<00:03, 78.11it/s, loss=19.525/18.908]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 120.39it/s, loss=17.974/17.004]

17.974262045135497



  4%|▍         | 10/250 [00:00<00:02, 96.57it/s, loss=16.911/17.401]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 122.20it/s, loss=16.243/15.909]

16.2432989012146



  4%|▍         | 10/250 [00:00<00:02, 96.17it/s, loss=15.563/15.574]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 123.06it/s, loss=15.251/15.326]


15.250792391662587


  4%|▍         | 10/250 [00:00<00:02, 98.95it/s, loss=14.820/14.918]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 120.96it/s, loss=14.547/14.399]

14.547077417297356



  4%|▎         | 9/250 [00:00<00:02, 88.50it/s, loss=14.154/13.922]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 121.41it/s, loss=13.966/13.662]

13.96606660415649



  4%|▍         | 10/250 [00:00<00:02, 95.92it/s, loss=13.706/13.576]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 119.01it/s, loss=13.458/12.793]


13.457938981933596


  4%|▎         | 9/250 [00:00<00:02, 85.13it/s, loss=13.108/12.695]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 121.27it/s, loss=13.000/13.040]

13.00031515823364



  4%|▎         | 9/250 [00:00<00:02, 89.43it/s, loss=12.625/12.241]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 122.24it/s, loss=12.579/12.227]

12.579394332275387



  4%|▎         | 9/250 [00:00<00:02, 88.41it/s, loss=12.310/12.357]

epoch:8


100%|██████████| 250/250 [00:01<00:00, 127.05it/s, loss=12.190/12.128]

12.189992380599977



  4%|▍         | 10/250 [00:00<00:02, 99.24it/s, loss=11.957/11.566]

epoch:9


100%|██████████| 250/250 [00:01<00:00, 125.04it/s, loss=11.827/11.591]

11.827154280090337



  5%|▍         | 12/250 [00:00<00:02, 116.72it/s, loss=11.585/11.478]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 122.11it/s, loss=11.489/11.638]

11.489032966613765



  5%|▍         | 12/250 [00:00<00:02, 111.86it/s, loss=11.318/11.229]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 122.58it/s, loss=11.172/10.950]

11.17229689201355



  4%|▍         | 10/250 [00:00<00:02, 98.09it/s, loss=11.103/11.029]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 122.73it/s, loss=10.874/10.635]


10.874407013778677


  4%|▍         | 11/250 [00:00<00:02, 108.89it/s, loss=10.696/10.773]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 120.40it/s, loss=10.593/10.578]


10.59329542762757


  4%|▍         | 10/250 [00:00<00:02, 97.64it/s, loss=10.567/10.595]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 123.86it/s, loss=10.329/10.483]

10.328595769348153



  4%|▎         | 9/250 [00:00<00:02, 88.94it/s, loss=10.296/10.386]

epoch:15


100%|██████████| 250/250 [00:01<00:00, 125.15it/s, loss=10.077/9.914]

10.07739082061768



  4%|▍         | 10/250 [00:00<00:02, 99.27it/s, loss=9.936/9.758]

epoch:16


100%|██████████| 250/250 [00:01<00:00, 125.17it/s, loss=9.842/9.575]

9.842236241683965



  4%|▍         | 10/250 [00:00<00:02, 92.86it/s, loss=9.720/9.463]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 123.78it/s, loss=9.619/9.380]


9.618882181396483


  5%|▍         | 12/250 [00:00<00:02, 114.81it/s, loss=9.437/9.206]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 124.45it/s, loss=9.406/9.874]

9.406325123062134



  4%|▍         | 10/250 [00:00<00:02, 96.69it/s, loss=9.244/9.093]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 120.73it/s, loss=9.206/9.345]


9.20555827823639


  4%|▎         | 9/250 [00:00<00:02, 88.23it/s, loss=9.163/9.453]

epoch:20


100%|██████████| 250/250 [00:01<00:00, 125.80it/s, loss=9.016/8.919]

9.015846453056339



  4%|▎         | 9/250 [00:00<00:02, 85.34it/s, loss=9.003/8.751]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 122.85it/s, loss=8.835/9.079]

8.835441557998658



  5%|▍         | 12/250 [00:00<00:02, 115.24it/s, loss=8.796/8.846]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 124.71it/s, loss=8.666/8.393]

8.665594764442455



  4%|▍         | 11/250 [00:00<00:02, 106.18it/s, loss=8.491/8.489]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 121.10it/s, loss=8.502/8.268]

8.501742654533391



  5%|▍         | 12/250 [00:00<00:02, 118.33it/s, loss=8.384/8.640]

epoch:24


100%|██████████| 250/250 [00:02<00:00, 123.30it/s, loss=8.350/8.399]


8.34988468917847


100%|██████████| 50/50 [00:00<00:00, 113.11it/s]

lr:1e-05; reg:0; acc:0.3525



  4%|▎         | 9/250 [00:00<00:02, 86.52it/s, loss=19.783/20.194]

epoch:0


100%|██████████| 250/250 [00:02<00:00, 121.08it/s, loss=18.045/17.428]

18.045271944122323



  5%|▍         | 12/250 [00:00<00:02, 114.60it/s, loss=17.038/16.737]

epoch:1


100%|██████████| 250/250 [00:01<00:00, 126.27it/s, loss=16.420/16.377]

16.420397151489254



  4%|▎         | 9/250 [00:00<00:02, 83.67it/s, loss=15.821/15.735]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 115.97it/s, loss=15.418/15.172]


15.417640482330313


  4%|▍         | 10/250 [00:00<00:02, 98.36it/s, loss=14.994/14.902]

epoch:3


100%|██████████| 250/250 [00:01<00:00, 129.22it/s, loss=14.691/14.541]


14.69065386764526


  4%|▎         | 9/250 [00:00<00:02, 84.98it/s, loss=14.193/14.295]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 122.69it/s, loss=14.094/13.949]

14.093686509704598



  4%|▍         | 10/250 [00:00<00:02, 94.65it/s, loss=13.814/14.004]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 122.81it/s, loss=13.575/13.686]

13.575169991607668



  5%|▍         | 12/250 [00:00<00:02, 116.79it/s, loss=13.466/13.841]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 119.55it/s, loss=13.107/12.927]

13.107002156448369



  5%|▍         | 12/250 [00:00<00:02, 112.36it/s, loss=12.948/13.015]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 120.74it/s, loss=12.682/12.658]

12.681755866394049



  4%|▍         | 10/250 [00:00<00:02, 99.10it/s, loss=12.363/12.146]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 121.20it/s, loss=12.286/12.008]

12.286451157684324



  4%|▍         | 10/250 [00:00<00:02, 99.57it/s, loss=11.895/12.255]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 124.67it/s, loss=11.919/11.882]

11.919244855270387



  4%|▍         | 10/250 [00:00<00:02, 96.50it/s, loss=11.665/11.952]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 121.90it/s, loss=11.575/11.232]


11.575330968399054


  4%|▎         | 9/250 [00:00<00:02, 87.69it/s, loss=11.364/11.080]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 122.08it/s, loss=11.254/11.030]

11.25403520645141



  4%|▍         | 11/250 [00:00<00:02, 105.99it/s, loss=11.070/11.079]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 120.87it/s, loss=10.951/10.818]


10.950599332275383


  5%|▍         | 12/250 [00:00<00:02, 115.87it/s, loss=10.859/11.054]

epoch:13


100%|██████████| 250/250 [00:01<00:00, 125.61it/s, loss=10.666/10.500]


10.665974283905033


  4%|▍         | 11/250 [00:00<00:02, 108.35it/s, loss=10.497/10.982]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 126.94it/s, loss=10.398/10.094]


10.39792682792664


  4%|▍         | 10/250 [00:00<00:02, 96.99it/s, loss=10.261/10.405]

epoch:15


100%|██████████| 250/250 [00:01<00:00, 126.92it/s, loss=10.143/9.855]

10.1433008946228



  4%|▍         | 11/250 [00:00<00:02, 106.83it/s, loss=9.895/9.879]

epoch:16


100%|██████████| 250/250 [00:02<00:00, 122.06it/s, loss=9.903/10.135]

9.903437608413697



  4%|▍         | 11/250 [00:00<00:02, 109.90it/s, loss=9.969/10.008]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 122.27it/s, loss=9.677/9.363]

9.676524515838627



  4%|▍         | 11/250 [00:00<00:02, 105.84it/s, loss=9.407/9.244]

epoch:18


100%|██████████| 250/250 [00:02<00:00, 121.40it/s, loss=9.462/9.030]


9.46193714782715


  4%|▎         | 9/250 [00:00<00:02, 88.73it/s, loss=9.350/9.236]

epoch:19


100%|██████████| 250/250 [00:01<00:00, 126.57it/s, loss=9.259/9.439]


9.25918408191681


  4%|▍         | 11/250 [00:00<00:02, 105.33it/s, loss=9.121/9.043]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 117.70it/s, loss=9.064/8.491]

9.064210722541809



  4%|▍         | 10/250 [00:00<00:02, 99.05it/s, loss=8.984/9.067]

epoch:21


100%|██████████| 250/250 [00:02<00:00, 120.10it/s, loss=8.884/8.477]


8.88382139232635


  4%|▍         | 11/250 [00:00<00:02, 109.94it/s, loss=8.822/8.719]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 119.98it/s, loss=8.711/8.227]

8.710953771667482



  4%|▍         | 10/250 [00:00<00:02, 96.00it/s, loss=8.559/8.231]

epoch:23


100%|██████████| 250/250 [00:02<00:00, 120.91it/s, loss=8.547/8.569]


8.547316085166932


  4%|▍         | 10/250 [00:00<00:02, 95.55it/s, loss=8.410/8.460]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 125.79it/s, loss=8.390/8.273]


8.390034563941958


100%|██████████| 50/50 [00:00<00:00, 121.47it/s]


lr:1e-05; reg:1; acc:0.3516


  4%|▎         | 9/250 [00:00<00:02, 86.12it/s, loss=19.765/19.497]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 128.09it/s, loss=18.137/16.791]

18.136583413543708



  4%|▍         | 11/250 [00:00<00:02, 107.99it/s, loss=16.962/16.851]

epoch:1


100%|██████████| 250/250 [00:01<00:00, 125.54it/s, loss=16.432/15.662]

16.43188051269532



  4%|▎         | 9/250 [00:00<00:02, 89.93it/s, loss=15.854/15.652]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 122.13it/s, loss=15.434/15.002]


15.433541243896478


  4%|▍         | 11/250 [00:00<00:02, 107.02it/s, loss=15.140/14.770]

epoch:3


100%|██████████| 250/250 [00:01<00:00, 126.81it/s, loss=14.717/14.609]

14.716808913726807



  4%|▍         | 10/250 [00:00<00:02, 98.54it/s, loss=14.442/14.433]

epoch:4


100%|██████████| 250/250 [00:02<00:00, 122.79it/s, loss=14.128/13.995]


14.128223860702507


  4%|▍         | 10/250 [00:00<00:02, 93.62it/s, loss=13.787/14.039]

epoch:5


100%|██████████| 250/250 [00:02<00:00, 124.75it/s, loss=13.609/13.293]

13.60905545394897



  4%|▍         | 11/250 [00:00<00:02, 109.29it/s, loss=13.287/13.572]

epoch:6


100%|██████████| 250/250 [00:02<00:00, 119.55it/s, loss=13.144/13.220]

13.143519874343882



  4%|▍         | 11/250 [00:00<00:02, 106.54it/s, loss=12.915/13.062]

epoch:7


100%|██████████| 250/250 [00:02<00:00, 123.04it/s, loss=12.713/12.515]

12.713452345809932



  4%|▍         | 10/250 [00:00<00:02, 97.91it/s, loss=12.456/12.207]

epoch:8


100%|██████████| 250/250 [00:02<00:00, 120.58it/s, loss=12.317/11.607]

12.317277776794437



  4%|▍         | 10/250 [00:00<00:02, 99.86it/s, loss=12.079/11.994]

epoch:9


100%|██████████| 250/250 [00:02<00:00, 124.36it/s, loss=11.949/12.266]


11.949345539932251


  4%|▍         | 11/250 [00:00<00:02, 107.24it/s, loss=11.832/11.717]

epoch:10


100%|██████████| 250/250 [00:02<00:00, 124.48it/s, loss=11.604/11.535]


11.60430679260254


  4%|▎         | 9/250 [00:00<00:02, 89.48it/s, loss=11.486/11.243]

epoch:11


100%|██████████| 250/250 [00:02<00:00, 124.25it/s, loss=11.281/10.915]

11.28103151245118



  4%|▎         | 9/250 [00:00<00:02, 88.78it/s, loss=11.031/11.292]

epoch:12


100%|██████████| 250/250 [00:02<00:00, 119.95it/s, loss=10.976/10.676]

10.976299421463018



  4%|▍         | 10/250 [00:00<00:02, 97.68it/s, loss=10.791/11.459]

epoch:13


100%|██████████| 250/250 [00:02<00:00, 119.71it/s, loss=10.691/10.924]

10.691312264633178



  4%|▎         | 9/250 [00:00<00:03, 72.32it/s, loss=10.396/10.476]

epoch:14


100%|██████████| 250/250 [00:02<00:00, 121.49it/s, loss=10.421/10.206]

10.421123161773679



  4%|▍         | 10/250 [00:00<00:02, 94.75it/s, loss=10.325/10.139]

epoch:15


100%|██████████| 250/250 [00:02<00:00, 122.64it/s, loss=10.166/10.095]

10.166080602416994



  5%|▍         | 12/250 [00:00<00:02, 114.46it/s, loss=10.084/9.800]

epoch:16


100%|██████████| 250/250 [00:02<00:00, 122.70it/s, loss=9.926/10.476]

9.925587355270382



  4%|▍         | 10/250 [00:00<00:02, 98.52it/s, loss=9.919/10.065]

epoch:17


100%|██████████| 250/250 [00:02<00:00, 118.94it/s, loss=9.698/9.307]

9.698030271987918



  5%|▍         | 12/250 [00:00<00:02, 113.56it/s, loss=9.491/9.207]

epoch:18


100%|██████████| 250/250 [00:01<00:00, 126.53it/s, loss=9.481/9.536]

9.48103073791503



  4%|▍         | 10/250 [00:00<00:02, 99.52it/s, loss=9.301/8.991]

epoch:19


100%|██████████| 250/250 [00:02<00:00, 124.06it/s, loss=9.278/8.916]


9.277792062721254


  4%|▍         | 10/250 [00:00<00:02, 97.75it/s, loss=9.318/9.516]

epoch:20


100%|██████████| 250/250 [00:02<00:00, 124.56it/s, loss=9.084/9.096]

9.083751163291936



  4%|▍         | 11/250 [00:00<00:02, 106.75it/s, loss=9.033/8.748]

epoch:21


100%|██████████| 250/250 [00:01<00:00, 127.22it/s, loss=8.901/8.855]

8.900637477149964



  4%|▎         | 9/250 [00:00<00:02, 89.53it/s, loss=8.780/8.561]

epoch:22


100%|██████████| 250/250 [00:02<00:00, 120.69it/s, loss=8.726/8.562]

8.726460897178649



  4%|▎         | 9/250 [00:00<00:02, 89.37it/s, loss=8.681/9.006]

epoch:23


100%|██████████| 250/250 [00:01<00:00, 125.93it/s, loss=8.560/8.586]

8.560233781890874



  4%|▍         | 10/250 [00:00<00:02, 98.47it/s, loss=8.552/8.561]

epoch:24


100%|██████████| 250/250 [00:01<00:00, 126.65it/s, loss=8.405/8.145]


8.405145533981328


100%|██████████| 50/50 [00:00<00:00, 124.49it/s]

lr:1e-05; reg:10; acc:0.3458



  4%|▍         | 10/250 [00:00<00:02, 96.58it/s, loss=19.577/19.411]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 126.84it/s, loss=18.361/17.002]

18.361432312469468



  4%|▍         | 10/250 [00:00<00:02, 97.29it/s, loss=17.323/17.297]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 121.95it/s, loss=16.591/15.852]

16.591093897247312



  4%|▍         | 10/250 [00:00<00:02, 96.09it/s, loss=15.949/15.992]

epoch:2


 14%|█▍        | 36/250 [00:00<00:01, 109.76it/s, loss=15.934/15.775]