<a href="https://colab.research.google.com/github/ArshT/Pytorch_Practice/blob/master/MNIST_HP_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/2b/21/d13081805e1e1afc71f5bb743ece324c8bd576237c51b899ecb38a717502/optuna-2.7.0-py3-none-any.whl (293kB)
[K     |████████████████████████████████| 296kB 16.6MB/s 
[?25hCollecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/a2/d6/7d9acb68a77acd140be7fececb7f2701b2a29d2da9c54184cb8f93509590/cliff-3.7.0-py3-none-any.whl (80kB)
[K     |████████████████████████████████| 81kB 7.6MB/s 
Collecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/72/a4/97eb6273839655cac14947986fa7a5935350fcfd4fff872e9654264c82d8/alembic-1.5.8-py2.py3-none-any.whl (159kB)
[K     |████████████████████████████████| 163kB 26.4MB/s 
[?25hCollecting colorlog
  Downloading https://files.pythonhosted.org/packages/32/e6/e9ddc6fa1104fda718338b341e4b3dc31cd8039ab29e52fc73b508515361/colorlog-5.0.1-py2.py3-none-any.whl
Collecting cmaes>=0.8.2
  Downloading https://files.pythonhosted.org/packages/01/1f/

In [4]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
import optuna

In [5]:
def load_data():
  transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])
  
  trainset = torchvision.datasets.MNIST(root='./data', train=True,download=True, transform=transform)
  
  testset = torchvision.datasets.MNIST(root='./data', train=False,download=True, transform=transform)

  trainloader = torch.utils.data.DataLoader(trainset,batch_size=32,shuffle = True,num_workers = 2)
  testloader = torch.utils.data.DataLoader(testset,batch_size=32,shuffle = False,num_workers = 2)

  return trainloader,testloader

In [36]:
class Net(nn.Module):

  def __init__(self,l1=512,l2=256):
    super(Net, self).__init__()

    self.drop = nn.Dropout(0.2)

    self.fc1 = nn.Linear(1*28*28,l1)
    self.fc2 = nn.Linear(l1,l2)
    self.fc3 = nn.Linear(l2,10)
  
  def forward(self,x):

    x = x.view(x.shape[0],1*28*28)

    x = F.relu(self.fc1(x))
    x = self.drop(x)
    x = F.relu(self.fc2(x))
    x = self.fc3(x)

    return x

In [37]:
def train(log_interval,model,dataloader,epoch,optimizer,criterion):

  model.train()
  running_loss = 0.0
  for batch_idx, (data, target) in enumerate(dataloader):
    optimizer.zero_grad()

    data = data.to(torch.device('cuda:0'))
    target = target.to(torch.device('cuda:0'))

    output = model(data)
    loss = criterion(output,target)

    loss.backward()
    optimizer.step()

    running_loss += loss.item()

    if batch_idx % log_interval == log_interval-1:    
      print('[%d ,%5d] loss: %.3f' %(epoch, batch_idx + 1, running_loss / log_interval))
      running_loss = 0.0
      
def test(model, dataloader,criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in dataloader:
            data = data.to(torch.device('cuda:0'))
            target = target.to(torch.device('cuda:0'))
            
            output = model(data.to('cuda:0'))
            test_loss += criterion(output,target).item()  
            pred = output.argmax(dim=1, keepdim=True) 
            correct += pred.eq(target.to('cuda:0').view_as(pred)).sum().item()

    test_loss /= len(dataloader.dataset)
    accuracy = 100. * correct / len(dataloader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(dataloader.dataset),100. * correct / len(dataloader.dataset)))
    
    return accuracy

In [42]:
def train_mnist(trial):

  cfg = { 'device' : "cuda" if torch.cuda.is_available() else "cpu",
          'n_epochs' : 10,
          'log_interval' : 300,
          'lr' : trial.suggest_loguniform('lr', 1e-4, 1e-1),          
          'l1' : trial.suggest_int('l1',16,512),
          'l2' : trial.suggest_int('l2',16,512)}



  train_loader, test_loader = load_data()
  print(cfg['l1'],cfg['l2'],cfg['lr'])
  model = Net(cfg['l1'],cfg['l2']).to('cuda:0')
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(),lr=cfg['lr'],momentum=0.95)
  
  for epoch in range(1, cfg['n_epochs'] + 1):
      train(cfg['log_interval'], model, train_loader,epoch,optimizer,criterion)
      test_accuracy = test(model, test_loader,criterion)
      print(test_accuracy)

  return test_accuracy

if __name__ == '__main__':

  sampler = optuna.samplers.TPESampler()
      
  study = optuna.create_study(sampler=sampler, direction='maximize')
  study.optimize(func=train_mnist, n_trials=100)
  #joblib.dump(study, '/content/gdrive/My Drive/Colab_Data/studies/mnist_optuna.pkl')

[32m[I 2021-04-28 12:21:43,016][0m A new study created in memory with name: no-name-b7550949-791c-4574-82b3-5006347f5910[0m


164 91 0.0001277743765567906
[1 ,  300] loss: 2.269
[1 ,  600] loss: 2.126
[1 ,  900] loss: 1.865
[1 , 1200] loss: 1.529
[1 , 1500] loss: 1.195
[1 , 1800] loss: 0.956

Test set: Average loss: 0.0233, Accuracy: 8187/10000 (82%)

81.87
[2 ,  300] loss: 0.757
[2 ,  600] loss: 0.673
[2 ,  900] loss: 0.627
[2 , 1200] loss: 0.565
[2 , 1500] loss: 0.560
[2 , 1800] loss: 0.523

Test set: Average loss: 0.0135, Accuracy: 8832/10000 (88%)

88.32
[3 ,  300] loss: 0.485
[3 ,  600] loss: 0.458
[3 ,  900] loss: 0.451
[3 , 1200] loss: 0.457
[3 , 1500] loss: 0.432
[3 , 1800] loss: 0.427

Test set: Average loss: 0.0111, Accuracy: 8993/10000 (90%)

89.93
[4 ,  300] loss: 0.408
[4 ,  600] loss: 0.413
[4 ,  900] loss: 0.383
[4 , 1200] loss: 0.385
[4 , 1500] loss: 0.359
[4 , 1800] loss: 0.375

Test set: Average loss: 0.0099, Accuracy: 9096/10000 (91%)

90.96
[5 ,  300] loss: 0.367
[5 ,  600] loss: 0.351
[5 ,  900] loss: 0.353
[5 , 1200] loss: 0.344
[5 , 1500] loss: 0.355
[5 , 1800] loss: 0.341

Test set: Av

[32m[I 2021-04-28 12:23:47,198][0m Trial 0 finished with value: 93.96 and parameters: {'lr': 0.0001277743765567906, 'l1': 164, 'l2': 91}. Best is trial 0 with value: 93.96.[0m



Test set: Average loss: 0.0066, Accuracy: 9396/10000 (94%)

93.96
187 403 0.0003431954533929123
[1 ,  300] loss: 2.097
[1 ,  600] loss: 1.206
[1 ,  900] loss: 0.717
[1 , 1200] loss: 0.565
[1 , 1500] loss: 0.495
[1 , 1800] loss: 0.449

Test set: Average loss: 0.0112, Accuracy: 8999/10000 (90%)

89.99
[2 ,  300] loss: 0.417
[2 ,  600] loss: 0.387
[2 ,  900] loss: 0.372
[2 , 1200] loss: 0.365
[2 , 1500] loss: 0.358
[2 , 1800] loss: 0.330

Test set: Average loss: 0.0086, Accuracy: 9208/10000 (92%)

92.08
[3 ,  300] loss: 0.317
[3 ,  600] loss: 0.323
[3 ,  900] loss: 0.297
[3 , 1200] loss: 0.275
[3 , 1500] loss: 0.285
[3 , 1800] loss: 0.286

Test set: Average loss: 0.0073, Accuracy: 9315/10000 (93%)

93.15
[4 ,  300] loss: 0.261
[4 ,  600] loss: 0.261
[4 ,  900] loss: 0.259
[4 , 1200] loss: 0.265
[4 , 1500] loss: 0.243
[4 , 1800] loss: 0.240

Test set: Average loss: 0.0065, Accuracy: 9377/10000 (94%)

93.77
[5 ,  300] loss: 0.227
[5 ,  600] loss: 0.227
[5 ,  900] loss: 0.225
[5 , 1200] los

[32m[I 2021-04-28 12:25:52,485][0m Trial 1 finished with value: 96.32 and parameters: {'lr': 0.0003431954533929123, 'l1': 187, 'l2': 403}. Best is trial 1 with value: 96.32.[0m



Test set: Average loss: 0.0037, Accuracy: 9632/10000 (96%)

96.32
28 410 0.0009891861585670627
[1 ,  300] loss: 1.476
[1 ,  600] loss: 0.658
[1 ,  900] loss: 0.553
[1 , 1200] loss: 0.501
[1 , 1500] loss: 0.457
[1 , 1800] loss: 0.445

Test set: Average loss: 0.0086, Accuracy: 9166/10000 (92%)

91.66
[2 ,  300] loss: 0.415
[2 ,  600] loss: 0.395
[2 ,  900] loss: 0.360
[2 , 1200] loss: 0.400
[2 , 1500] loss: 0.370
[2 , 1800] loss: 0.368

Test set: Average loss: 0.0074, Accuracy: 9268/10000 (93%)

92.68
[3 ,  300] loss: 0.339
[3 ,  600] loss: 0.336
[3 ,  900] loss: 0.327
[3 , 1200] loss: 0.344
[3 , 1500] loss: 0.325
[3 , 1800] loss: 0.318

Test set: Average loss: 0.0069, Accuracy: 9325/10000 (93%)

93.25
[4 ,  300] loss: 0.301
[4 ,  600] loss: 0.306
[4 ,  900] loss: 0.287
[4 , 1200] loss: 0.291
[4 , 1500] loss: 0.295
[4 , 1800] loss: 0.300

Test set: Average loss: 0.0055, Accuracy: 9429/10000 (94%)

94.29
[5 ,  300] loss: 0.274
[5 ,  600] loss: 0.276
[5 ,  900] loss: 0.268
[5 , 1200] loss

[32m[I 2021-04-28 12:27:57,795][0m Trial 2 finished with value: 95.11 and parameters: {'lr': 0.0009891861585670627, 'l1': 28, 'l2': 410}. Best is trial 1 with value: 96.32.[0m



Test set: Average loss: 0.0047, Accuracy: 9511/10000 (95%)

95.11
267 94 0.0003011733606436442
[1 ,  300] loss: 2.165
[1 ,  600] loss: 1.482
[1 ,  900] loss: 0.851
[1 , 1200] loss: 0.635
[1 , 1500] loss: 0.517
[1 , 1800] loss: 0.471

Test set: Average loss: 0.0123, Accuracy: 8908/10000 (89%)

89.08
[2 ,  300] loss: 0.429
[2 ,  600] loss: 0.398
[2 ,  900] loss: 0.399
[2 , 1200] loss: 0.377
[2 , 1500] loss: 0.349
[2 , 1800] loss: 0.358

Test set: Average loss: 0.0093, Accuracy: 9154/10000 (92%)

91.54
[3 ,  300] loss: 0.334
[3 ,  600] loss: 0.321
[3 ,  900] loss: 0.327
[3 , 1200] loss: 0.315
[3 , 1500] loss: 0.314
[3 , 1800] loss: 0.287

Test set: Average loss: 0.0080, Accuracy: 9254/10000 (93%)

92.54
[4 ,  300] loss: 0.275
[4 ,  600] loss: 0.295
[4 ,  900] loss: 0.277
[4 , 1200] loss: 0.263
[4 , 1500] loss: 0.261
[4 , 1800] loss: 0.259

Test set: Average loss: 0.0071, Accuracy: 9326/10000 (93%)

93.26
[5 ,  300] loss: 0.253
[5 ,  600] loss: 0.242
[5 ,  900] loss: 0.236
[5 , 1200] loss

[32m[I 2021-04-28 12:30:03,157][0m Trial 3 finished with value: 96.17 and parameters: {'lr': 0.0003011733606436442, 'l1': 267, 'l2': 94}. Best is trial 1 with value: 96.32.[0m



Test set: Average loss: 0.0039, Accuracy: 9617/10000 (96%)

96.17
227 437 0.009320036010913016
[1 ,  300] loss: 0.810
[1 ,  600] loss: 0.439
[1 ,  900] loss: 0.380
[1 , 1200] loss: 0.340
[1 , 1500] loss: 0.309
[1 , 1800] loss: 0.283

Test set: Average loss: 0.0057, Accuracy: 9434/10000 (94%)

94.34
[2 ,  300] loss: 0.246
[2 ,  600] loss: 0.252
[2 ,  900] loss: 0.246
[2 , 1200] loss: 0.235
[2 , 1500] loss: 0.223
[2 , 1800] loss: 0.220

Test set: Average loss: 0.0043, Accuracy: 9576/10000 (96%)

95.76
[3 ,  300] loss: 0.194
[3 ,  600] loss: 0.198
[3 ,  900] loss: 0.210
[3 , 1200] loss: 0.186
[3 , 1500] loss: 0.186
[3 , 1800] loss: 0.192

Test set: Average loss: 0.0047, Accuracy: 9535/10000 (95%)

95.35
[4 ,  300] loss: 0.162
[4 ,  600] loss: 0.179
[4 ,  900] loss: 0.163
[4 , 1200] loss: 0.170
[4 , 1500] loss: 0.171
[4 , 1800] loss: 0.167

Test set: Average loss: 0.0043, Accuracy: 9591/10000 (96%)

95.91
[5 ,  300] loss: 0.152
[5 ,  600] loss: 0.145
[5 ,  900] loss: 0.145
[5 , 1200] loss

[32m[I 2021-04-28 12:32:09,093][0m Trial 4 finished with value: 97.39 and parameters: {'lr': 0.009320036010913016, 'l1': 227, 'l2': 437}. Best is trial 4 with value: 97.39.[0m



Test set: Average loss: 0.0026, Accuracy: 9739/10000 (97%)

97.39
113 130 0.042600746571131894
[1 ,  300] loss: 1.420
[1 ,  600] loss: 1.539
[1 ,  900] loss: 1.737
[1 , 1200] loss: 2.053
[1 , 1500] loss: 2.101
[1 , 1800] loss: 2.085

Test set: Average loss: 0.0647, Accuracy: 1870/10000 (19%)

18.7
[2 ,  300] loss: 2.140
[2 ,  600] loss: 2.105
[2 ,  900] loss: 2.104
[2 , 1200] loss: 2.116
[2 , 1500] loss: 2.143
[2 , 1800] loss: 2.159

Test set: Average loss: 0.0674, Accuracy: 1787/10000 (18%)

17.87
[3 ,  300] loss: 2.294
[3 ,  600] loss: 2.306
[3 ,  900] loss: 2.307
[3 , 1200] loss: 2.305
[3 , 1500] loss: 2.306
[3 , 1800] loss: 2.309

Test set: Average loss: 0.0721, Accuracy: 976/10000 (10%)

9.76
[4 ,  300] loss: 2.306
[4 ,  600] loss: 2.307
[4 ,  900] loss: 2.306
[4 , 1200] loss: 2.242
[4 , 1500] loss: 2.159
[4 , 1800] loss: 2.114

Test set: Average loss: 0.0633, Accuracy: 2238/10000 (22%)

22.38
[5 ,  300] loss: 2.133
[5 ,  600] loss: 2.104
[5 ,  900] loss: 2.115
[5 , 1200] loss: 2

[32m[I 2021-04-28 12:34:13,617][0m Trial 5 finished with value: 11.35 and parameters: {'lr': 0.042600746571131894, 'l1': 113, 'l2': 130}. Best is trial 4 with value: 97.39.[0m



Test set: Average loss: 0.0723, Accuracy: 1135/10000 (11%)

11.35
192 38 0.010999030688496914
[1 ,  300] loss: 0.878
[1 ,  600] loss: 0.487
[1 ,  900] loss: 0.471
[1 , 1200] loss: 0.437
[1 , 1500] loss: 0.399
[1 , 1800] loss: 0.365

Test set: Average loss: 0.0071, Accuracy: 9288/10000 (93%)

92.88
[2 ,  300] loss: 0.343
[2 ,  600] loss: 0.334
[2 ,  900] loss: 0.348
[2 , 1200] loss: 0.333
[2 , 1500] loss: 0.295
[2 , 1800] loss: 0.295

Test set: Average loss: 0.0067, Accuracy: 9340/10000 (93%)

93.4
[3 ,  300] loss: 0.278
[3 ,  600] loss: 0.263
[3 ,  900] loss: 0.270
[3 , 1200] loss: 0.256
[3 , 1500] loss: 0.282
[3 , 1800] loss: 0.260

Test set: Average loss: 0.0054, Accuracy: 9476/10000 (95%)

94.76
[4 ,  300] loss: 0.243
[4 ,  600] loss: 0.247
[4 ,  900] loss: 0.247
[4 , 1200] loss: 0.249
[4 , 1500] loss: 0.246
[4 , 1800] loss: 0.262

Test set: Average loss: 0.0052, Accuracy: 9485/10000 (95%)

94.85
[5 ,  300] loss: 0.222
[5 ,  600] loss: 0.213
[5 ,  900] loss: 0.249
[5 , 1200] loss: 

[32m[I 2021-04-28 12:36:19,966][0m Trial 6 finished with value: 96.04 and parameters: {'lr': 0.010999030688496914, 'l1': 192, 'l2': 38}. Best is trial 4 with value: 97.39.[0m



Test set: Average loss: 0.0044, Accuracy: 9604/10000 (96%)

96.04
167 253 0.03378221430204824
[1 ,  300] loss: 1.336
[1 ,  600] loss: 1.533
[1 ,  900] loss: 1.634
[1 , 1200] loss: 1.625
[1 , 1500] loss: 1.685
[1 , 1800] loss: 1.814

Test set: Average loss: 0.0512, Accuracy: 3293/10000 (33%)

32.93
[2 ,  300] loss: 1.757
[2 ,  600] loss: 1.842
[2 ,  900] loss: 1.906
[2 , 1200] loss: 1.904
[2 , 1500] loss: 1.874
[2 , 1800] loss: 1.883

Test set: Average loss: 0.0532, Accuracy: 2984/10000 (30%)

29.84
[3 ,  300] loss: 1.913
[3 ,  600] loss: 2.001
[3 ,  900] loss: 2.061
[3 , 1200] loss: 2.088
[3 , 1500] loss: 2.039
[3 , 1800] loss: 1.980

Test set: Average loss: 0.0571, Accuracy: 2308/10000 (23%)

23.08
[4 ,  300] loss: 1.953
[4 ,  600] loss: 1.974
[4 ,  900] loss: 1.957
[4 , 1200] loss: 2.049
[4 , 1500] loss: 1.963
[4 , 1800] loss: 2.088

Test set: Average loss: 0.0648, Accuracy: 1851/10000 (19%)

18.51
[5 ,  300] loss: 2.083
[5 ,  600] loss: 2.190
[5 ,  900] loss: 2.072
[5 , 1200] loss:

[32m[I 2021-04-28 12:38:32,128][0m Trial 7 finished with value: 10.32 and parameters: {'lr': 0.03378221430204824, 'l1': 167, 'l2': 253}. Best is trial 4 with value: 97.39.[0m



Test set: Average loss: 0.0722, Accuracy: 1032/10000 (10%)

10.32
498 119 0.023529162099618972
[1 ,  300] loss: 0.841
[1 ,  600] loss: 0.709
[1 ,  900] loss: 0.779
[1 , 1200] loss: 0.785
[1 , 1500] loss: 0.721
[1 , 1800] loss: 0.713

Test set: Average loss: 0.0204, Accuracy: 8206/10000 (82%)

82.06
[2 ,  300] loss: 0.650
[2 ,  600] loss: 0.643
[2 ,  900] loss: 0.641
[2 , 1200] loss: 0.619
[2 , 1500] loss: 0.672
[2 , 1800] loss: 0.653

Test set: Average loss: 0.0119, Accuracy: 9032/10000 (90%)

90.32
[3 ,  300] loss: 0.664
[3 ,  600] loss: 0.699
[3 ,  900] loss: 0.721
[3 , 1200] loss: 0.659
[3 , 1500] loss: 0.682
[3 , 1800] loss: 0.694

Test set: Average loss: 0.0140, Accuracy: 9001/10000 (90%)

90.01
[4 ,  300] loss: 0.749
[4 ,  600] loss: 0.699
[4 ,  900] loss: 0.689
[4 , 1200] loss: 0.699
[4 , 1500] loss: 0.691
[4 , 1800] loss: 0.691

Test set: Average loss: 0.0142, Accuracy: 8813/10000 (88%)

88.13
[5 ,  300] loss: 0.784
[5 ,  600] loss: 0.733
[5 ,  900] loss: 0.714
[5 , 1200] loss

[32m[I 2021-04-28 12:40:42,848][0m Trial 8 finished with value: 79.62 and parameters: {'lr': 0.023529162099618972, 'l1': 498, 'l2': 119}. Best is trial 4 with value: 97.39.[0m



Test set: Average loss: 0.0225, Accuracy: 7962/10000 (80%)

79.62
371 130 0.005386660765444729
[1 ,  300] loss: 0.872
[1 ,  600] loss: 0.381
[1 ,  900] loss: 0.311
[1 , 1200] loss: 0.290
[1 , 1500] loss: 0.265
[1 , 1800] loss: 0.230

Test set: Average loss: 0.0057, Accuracy: 9411/10000 (94%)

94.11
[2 ,  300] loss: 0.190
[2 ,  600] loss: 0.189
[2 ,  900] loss: 0.184
[2 , 1200] loss: 0.188
[2 , 1500] loss: 0.186
[2 , 1800] loss: 0.171

Test set: Average loss: 0.0043, Accuracy: 9559/10000 (96%)

95.59
[3 ,  300] loss: 0.139
[3 ,  600] loss: 0.136
[3 ,  900] loss: 0.145
[3 , 1200] loss: 0.145
[3 , 1500] loss: 0.150
[3 , 1800] loss: 0.137

Test set: Average loss: 0.0040, Accuracy: 9599/10000 (96%)

95.99
[4 ,  300] loss: 0.111
[4 ,  600] loss: 0.120
[4 ,  900] loss: 0.123
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.118
[4 , 1800] loss: 0.104

Test set: Average loss: 0.0033, Accuracy: 9664/10000 (97%)

96.64
[5 ,  300] loss: 0.100
[5 ,  600] loss: 0.097
[5 ,  900] loss: 0.103
[5 , 1200] loss

[32m[I 2021-04-28 12:42:52,564][0m Trial 9 finished with value: 97.7 and parameters: {'lr': 0.005386660765444729, 'l1': 371, 'l2': 130}. Best is trial 9 with value: 97.7.[0m



Test set: Average loss: 0.0025, Accuracy: 9770/10000 (98%)

97.7
387 241 0.0025182655146897487
[1 ,  300] loss: 1.012
[1 ,  600] loss: 0.421
[1 ,  900] loss: 0.337
[1 , 1200] loss: 0.288
[1 , 1500] loss: 0.248
[1 , 1800] loss: 0.235

Test set: Average loss: 0.0056, Accuracy: 9441/10000 (94%)

94.41
[2 ,  300] loss: 0.204
[2 ,  600] loss: 0.193
[2 ,  900] loss: 0.182
[2 , 1200] loss: 0.178
[2 , 1500] loss: 0.169
[2 , 1800] loss: 0.164

Test set: Average loss: 0.0046, Accuracy: 9519/10000 (95%)

95.19
[3 ,  300] loss: 0.129
[3 ,  600] loss: 0.136
[3 ,  900] loss: 0.132
[3 , 1200] loss: 0.129
[3 , 1500] loss: 0.137
[3 , 1800] loss: 0.130

Test set: Average loss: 0.0032, Accuracy: 9674/10000 (97%)

96.74
[4 ,  300] loss: 0.108
[4 ,  600] loss: 0.108
[4 ,  900] loss: 0.105
[4 , 1200] loss: 0.105
[4 , 1500] loss: 0.115
[4 , 1800] loss: 0.104

Test set: Average loss: 0.0027, Accuracy: 9738/10000 (97%)

97.38
[5 ,  300] loss: 0.097
[5 ,  600] loss: 0.088
[5 ,  900] loss: 0.101
[5 , 1200] loss

[32m[I 2021-04-28 12:44:59,102][0m Trial 10 finished with value: 97.99 and parameters: {'lr': 0.0025182655146897487, 'l1': 387, 'l2': 241}. Best is trial 10 with value: 97.99.[0m



Test set: Average loss: 0.0020, Accuracy: 9799/10000 (98%)

97.99
390 250 0.002042488450426808
[1 ,  300] loss: 1.088
[1 ,  600] loss: 0.429
[1 ,  900] loss: 0.371
[1 , 1200] loss: 0.313
[1 , 1500] loss: 0.259
[1 , 1800] loss: 0.246

Test set: Average loss: 0.0061, Accuracy: 9426/10000 (94%)

94.26
[2 ,  300] loss: 0.216
[2 ,  600] loss: 0.211
[2 ,  900] loss: 0.185
[2 , 1200] loss: 0.193
[2 , 1500] loss: 0.175
[2 , 1800] loss: 0.160

Test set: Average loss: 0.0041, Accuracy: 9614/10000 (96%)

96.14
[3 ,  300] loss: 0.142
[3 ,  600] loss: 0.155
[3 ,  900] loss: 0.139
[3 , 1200] loss: 0.139
[3 , 1500] loss: 0.130
[3 , 1800] loss: 0.134

Test set: Average loss: 0.0034, Accuracy: 9668/10000 (97%)

96.68
[4 ,  300] loss: 0.109
[4 ,  600] loss: 0.108
[4 ,  900] loss: 0.109
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.113
[4 , 1800] loss: 0.120

Test set: Average loss: 0.0028, Accuracy: 9711/10000 (97%)

97.11
[5 ,  300] loss: 0.094
[5 ,  600] loss: 0.106
[5 ,  900] loss: 0.105
[5 , 1200] loss

[32m[I 2021-04-28 12:47:04,891][0m Trial 11 finished with value: 98.0 and parameters: {'lr': 0.002042488450426808, 'l1': 390, 'l2': 250}. Best is trial 11 with value: 98.0.[0m



Test set: Average loss: 0.0020, Accuracy: 9800/10000 (98%)

98.0
422 262 0.001689939311969669
[1 ,  300] loss: 1.187
[1 ,  600] loss: 0.439
[1 ,  900] loss: 0.373
[1 , 1200] loss: 0.324
[1 , 1500] loss: 0.297
[1 , 1800] loss: 0.244

Test set: Average loss: 0.0062, Accuracy: 9407/10000 (94%)

94.07
[2 ,  300] loss: 0.230
[2 ,  600] loss: 0.224
[2 ,  900] loss: 0.198
[2 , 1200] loss: 0.185
[2 , 1500] loss: 0.182
[2 , 1800] loss: 0.176

Test set: Average loss: 0.0041, Accuracy: 9584/10000 (96%)

95.84
[3 ,  300] loss: 0.153
[3 ,  600] loss: 0.149
[3 ,  900] loss: 0.143
[3 , 1200] loss: 0.143
[3 , 1500] loss: 0.140
[3 , 1800] loss: 0.132

Test set: Average loss: 0.0036, Accuracy: 9663/10000 (97%)

96.63
[4 ,  300] loss: 0.120
[4 ,  600] loss: 0.115
[4 ,  900] loss: 0.116
[4 , 1200] loss: 0.113
[4 , 1500] loss: 0.114
[4 , 1800] loss: 0.116

Test set: Average loss: 0.0030, Accuracy: 9694/10000 (97%)

96.94
[5 ,  300] loss: 0.099
[5 ,  600] loss: 0.107
[5 ,  900] loss: 0.102
[5 , 1200] loss:

[32m[I 2021-04-28 12:49:12,549][0m Trial 12 finished with value: 97.92 and parameters: {'lr': 0.001689939311969669, 'l1': 422, 'l2': 262}. Best is trial 11 with value: 98.0.[0m



Test set: Average loss: 0.0021, Accuracy: 9792/10000 (98%)

97.92
341 324 0.001644397581601143
[1 ,  300] loss: 1.214
[1 ,  600] loss: 0.444
[1 ,  900] loss: 0.363
[1 , 1200] loss: 0.310
[1 , 1500] loss: 0.304
[1 , 1800] loss: 0.270

Test set: Average loss: 0.0068, Accuracy: 9323/10000 (93%)

93.23
[2 ,  300] loss: 0.226
[2 ,  600] loss: 0.218
[2 ,  900] loss: 0.208
[2 , 1200] loss: 0.196
[2 , 1500] loss: 0.187
[2 , 1800] loss: 0.169

Test set: Average loss: 0.0044, Accuracy: 9550/10000 (96%)

95.5
[3 ,  300] loss: 0.165
[3 ,  600] loss: 0.150
[3 ,  900] loss: 0.144
[3 , 1200] loss: 0.149
[3 , 1500] loss: 0.146
[3 , 1800] loss: 0.143

Test set: Average loss: 0.0035, Accuracy: 9649/10000 (96%)

96.49
[4 ,  300] loss: 0.123
[4 ,  600] loss: 0.123
[4 ,  900] loss: 0.117
[4 , 1200] loss: 0.112
[4 , 1500] loss: 0.111
[4 , 1800] loss: 0.129

Test set: Average loss: 0.0035, Accuracy: 9636/10000 (96%)

96.36
[5 ,  300] loss: 0.108
[5 ,  600] loss: 0.105
[5 ,  900] loss: 0.107
[5 , 1200] loss:

[32m[I 2021-04-28 12:51:22,203][0m Trial 13 finished with value: 97.74 and parameters: {'lr': 0.001644397581601143, 'l1': 341, 'l2': 324}. Best is trial 11 with value: 98.0.[0m



Test set: Average loss: 0.0022, Accuracy: 9774/10000 (98%)

97.74
507 213 0.0030378149485437633
[1 ,  300] loss: 0.945
[1 ,  600] loss: 0.406
[1 ,  900] loss: 0.306
[1 , 1200] loss: 0.271
[1 , 1500] loss: 0.248
[1 , 1800] loss: 0.232

Test set: Average loss: 0.0053, Accuracy: 9467/10000 (95%)

94.67
[2 ,  300] loss: 0.190
[2 ,  600] loss: 0.186
[2 ,  900] loss: 0.169
[2 , 1200] loss: 0.161
[2 , 1500] loss: 0.162
[2 , 1800] loss: 0.160

Test set: Average loss: 0.0048, Accuracy: 9481/10000 (95%)

94.81
[3 ,  300] loss: 0.129
[3 ,  600] loss: 0.130
[3 ,  900] loss: 0.118
[3 , 1200] loss: 0.124
[3 , 1500] loss: 0.120
[3 , 1800] loss: 0.133

Test set: Average loss: 0.0030, Accuracy: 9694/10000 (97%)

96.94
[4 ,  300] loss: 0.103
[4 ,  600] loss: 0.105
[4 ,  900] loss: 0.098
[4 , 1200] loss: 0.112
[4 , 1500] loss: 0.099
[4 , 1800] loss: 0.100

Test set: Average loss: 0.0028, Accuracy: 9719/10000 (97%)

97.19
[5 ,  300] loss: 0.101
[5 ,  600] loss: 0.086
[5 ,  900] loss: 0.088
[5 , 1200] los

[32m[I 2021-04-28 12:53:32,365][0m Trial 14 finished with value: 98.04 and parameters: {'lr': 0.0030378149485437633, 'l1': 507, 'l2': 213}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9804/10000 (98%)

98.04
502 193 0.00047130163899409134
[1 ,  300] loss: 1.931
[1 ,  600] loss: 0.838
[1 ,  900] loss: 0.568
[1 , 1200] loss: 0.451
[1 , 1500] loss: 0.412
[1 , 1800] loss: 0.391

Test set: Average loss: 0.0104, Accuracy: 9055/10000 (91%)

90.55
[2 ,  300] loss: 0.357
[2 ,  600] loss: 0.340
[2 ,  900] loss: 0.327
[2 , 1200] loss: 0.314
[2 , 1500] loss: 0.303
[2 , 1800] loss: 0.296

Test set: Average loss: 0.0081, Accuracy: 9263/10000 (93%)

92.63
[3 ,  300] loss: 0.268
[3 ,  600] loss: 0.261
[3 ,  900] loss: 0.268
[3 , 1200] loss: 0.250
[3 , 1500] loss: 0.244
[3 , 1800] loss: 0.230

Test set: Average loss: 0.0065, Accuracy: 9378/10000 (94%)

93.78
[4 ,  300] loss: 0.226
[4 ,  600] loss: 0.219
[4 ,  900] loss: 0.214
[4 , 1200] loss: 0.199
[4 , 1500] loss: 0.196
[4 , 1800] loss: 0.206

Test set: Average loss: 0.0055, Accuracy: 9482/10000 (95%)

94.82
[5 ,  300] loss: 0.189
[5 ,  600] loss: 0.175
[5 ,  900] loss: 0.174
[5 , 1200] lo

[32m[I 2021-04-28 12:55:42,081][0m Trial 15 finished with value: 97.05 and parameters: {'lr': 0.00047130163899409134, 'l1': 502, 'l2': 193}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0030, Accuracy: 9705/10000 (97%)

97.05
461 354 0.004097317045298728
[1 ,  300] loss: 0.868
[1 ,  600] loss: 0.390
[1 ,  900] loss: 0.314
[1 , 1200] loss: 0.270
[1 , 1500] loss: 0.246
[1 , 1800] loss: 0.214

Test set: Average loss: 0.0048, Accuracy: 9497/10000 (95%)

94.97
[2 ,  300] loss: 0.178
[2 ,  600] loss: 0.181
[2 ,  900] loss: 0.171
[2 , 1200] loss: 0.175
[2 , 1500] loss: 0.171
[2 , 1800] loss: 0.155

Test set: Average loss: 0.0042, Accuracy: 9576/10000 (96%)

95.76
[3 ,  300] loss: 0.137
[3 ,  600] loss: 0.139
[3 ,  900] loss: 0.134
[3 , 1200] loss: 0.119
[3 , 1500] loss: 0.136
[3 , 1800] loss: 0.113

Test set: Average loss: 0.0032, Accuracy: 9666/10000 (97%)

96.66
[4 ,  300] loss: 0.100
[4 ,  600] loss: 0.105
[4 ,  900] loss: 0.122
[4 , 1200] loss: 0.106
[4 , 1500] loss: 0.106
[4 , 1800] loss: 0.098

Test set: Average loss: 0.0027, Accuracy: 9726/10000 (97%)

97.26
[5 ,  300] loss: 0.089
[5 ,  600] loss: 0.093
[5 ,  900] loss: 0.095
[5 , 1200] loss

[32m[I 2021-04-28 12:57:50,436][0m Trial 16 finished with value: 97.26 and parameters: {'lr': 0.004097317045298728, 'l1': 461, 'l2': 354}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0026, Accuracy: 9726/10000 (97%)

97.26
297 200 0.0006110614542231088
[1 ,  300] loss: 1.773
[1 ,  600] loss: 0.742
[1 ,  900] loss: 0.517
[1 , 1200] loss: 0.439
[1 , 1500] loss: 0.383
[1 , 1800] loss: 0.360

Test set: Average loss: 0.0096, Accuracy: 9097/10000 (91%)

90.97
[2 ,  300] loss: 0.337
[2 ,  600] loss: 0.317
[2 ,  900] loss: 0.311
[2 , 1200] loss: 0.291
[2 , 1500] loss: 0.286
[2 , 1800] loss: 0.271

Test set: Average loss: 0.0072, Accuracy: 9345/10000 (93%)

93.45
[3 ,  300] loss: 0.241
[3 ,  600] loss: 0.249
[3 ,  900] loss: 0.230
[3 , 1200] loss: 0.227
[3 , 1500] loss: 0.222
[3 , 1800] loss: 0.230

Test set: Average loss: 0.0058, Accuracy: 9455/10000 (95%)

94.55
[4 ,  300] loss: 0.199
[4 ,  600] loss: 0.198
[4 ,  900] loss: 0.191
[4 , 1200] loss: 0.188
[4 , 1500] loss: 0.181
[4 , 1800] loss: 0.176

Test set: Average loss: 0.0048, Accuracy: 9547/10000 (95%)

95.47
[5 ,  300] loss: 0.161
[5 ,  600] loss: 0.162
[5 ,  900] loss: 0.173
[5 , 1200] los

[32m[I 2021-04-28 13:00:01,110][0m Trial 17 finished with value: 97.49 and parameters: {'lr': 0.0006110614542231088, 'l1': 297, 'l2': 200}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0027, Accuracy: 9749/10000 (97%)

97.49
447 331 0.09399188901328175
[1 ,  300] loss: 2.232
[1 ,  600] loss: 2.339
[1 ,  900] loss: 2.317
[1 , 1200] loss: 2.314
[1 , 1500] loss: 2.308
[1 , 1800] loss: 2.314

Test set: Average loss: 0.0723, Accuracy: 1010/10000 (10%)

10.1
[2 ,  300] loss: 2.314
[2 ,  600] loss: 2.314
[2 ,  900] loss: 2.315
[2 , 1200] loss: 2.317
[2 , 1500] loss: 2.312
[2 , 1800] loss: 2.313

Test set: Average loss: 0.0728, Accuracy: 1135/10000 (11%)

11.35
[3 ,  300] loss: 2.312
[3 ,  600] loss: 2.316
[3 ,  900] loss: 2.314
[3 , 1200] loss: 2.315
[3 , 1500] loss: 2.313
[3 , 1800] loss: 2.314

Test set: Average loss: 0.0723, Accuracy: 1135/10000 (11%)

11.35
[4 ,  300] loss: 2.314
[4 ,  600] loss: 2.312
[4 ,  900] loss: 2.316
[4 , 1200] loss: 2.315
[4 , 1500] loss: 2.312
[4 , 1800] loss: 2.317

Test set: Average loss: 0.0724, Accuracy: 1010/10000 (10%)

10.1
[5 ,  300] loss: 2.313
[5 ,  600] loss: 2.314
[5 ,  900] loss: 2.317
[5 , 1200] loss: 2

[32m[I 2021-04-28 13:02:13,080][0m Trial 18 finished with value: 9.8 and parameters: {'lr': 0.09399188901328175, 'l1': 447, 'l2': 331}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0724, Accuracy: 980/10000 (10%)

9.8
332 196 0.0001303529295646277
[1 ,  300] loss: 2.249
[1 ,  600] loss: 2.042
[1 ,  900] loss: 1.686
[1 , 1200] loss: 1.252
[1 , 1500] loss: 0.955
[1 , 1800] loss: 0.786

Test set: Average loss: 0.0200, Accuracy: 8431/10000 (84%)

84.31
[2 ,  300] loss: 0.662
[2 ,  600] loss: 0.603
[2 ,  900] loss: 0.548
[2 , 1200] loss: 0.515
[2 , 1500] loss: 0.481
[2 , 1800] loss: 0.480

Test set: Average loss: 0.0126, Accuracy: 8916/10000 (89%)

89.16
[3 ,  300] loss: 0.437
[3 ,  600] loss: 0.430
[3 ,  900] loss: 0.422
[3 , 1200] loss: 0.403
[3 , 1500] loss: 0.403
[3 , 1800] loss: 0.393

Test set: Average loss: 0.0106, Accuracy: 9056/10000 (91%)

90.56
[4 ,  300] loss: 0.387
[4 ,  600] loss: 0.371
[4 ,  900] loss: 0.366
[4 , 1200] loss: 0.352
[4 , 1500] loss: 0.368
[4 , 1800] loss: 0.355

Test set: Average loss: 0.0097, Accuracy: 9098/10000 (91%)

90.98
[5 ,  300] loss: 0.348
[5 ,  600] loss: 0.349
[5 ,  900] loss: 0.340
[5 , 1200] loss: 

[32m[I 2021-04-28 13:04:24,096][0m Trial 19 finished with value: 93.87 and parameters: {'lr': 0.0001303529295646277, 'l1': 332, 'l2': 196}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0067, Accuracy: 9387/10000 (94%)

93.87
491 296 0.007769121253405615
[1 ,  300] loss: 0.796
[1 ,  600] loss: 0.398
[1 ,  900] loss: 0.320
[1 , 1200] loss: 0.295
[1 , 1500] loss: 0.278
[1 , 1800] loss: 0.250

Test set: Average loss: 0.0055, Accuracy: 9447/10000 (94%)

94.47
[2 ,  300] loss: 0.221
[2 ,  600] loss: 0.214
[2 ,  900] loss: 0.206
[2 , 1200] loss: 0.185
[2 , 1500] loss: 0.171
[2 , 1800] loss: 0.188

Test set: Average loss: 0.0043, Accuracy: 9560/10000 (96%)

95.6
[3 ,  300] loss: 0.158
[3 ,  600] loss: 0.165
[3 ,  900] loss: 0.162
[3 , 1200] loss: 0.161
[3 , 1500] loss: 0.146
[3 , 1800] loss: 0.147

Test set: Average loss: 0.0033, Accuracy: 9656/10000 (97%)

96.56
[4 ,  300] loss: 0.125
[4 ,  600] loss: 0.135
[4 ,  900] loss: 0.124
[4 , 1200] loss: 0.124
[4 , 1500] loss: 0.130
[4 , 1800] loss: 0.123

Test set: Average loss: 0.0031, Accuracy: 9701/10000 (97%)

97.01
[5 ,  300] loss: 0.123
[5 ,  600] loss: 0.112
[5 ,  900] loss: 0.113
[5 , 1200] loss:

[32m[I 2021-04-28 13:06:34,787][0m Trial 20 finished with value: 97.47 and parameters: {'lr': 0.007769121253405615, 'l1': 491, 'l2': 296}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0027, Accuracy: 9747/10000 (97%)

97.47
399 230 0.0022757665847143245
[1 ,  300] loss: 1.059
[1 ,  600] loss: 0.421
[1 ,  900] loss: 0.334
[1 , 1200] loss: 0.306
[1 , 1500] loss: 0.257
[1 , 1800] loss: 0.246

Test set: Average loss: 0.0053, Accuracy: 9471/10000 (95%)

94.71
[2 ,  300] loss: 0.219
[2 ,  600] loss: 0.202
[2 ,  900] loss: 0.188
[2 , 1200] loss: 0.169
[2 , 1500] loss: 0.162
[2 , 1800] loss: 0.180

Test set: Average loss: 0.0038, Accuracy: 9634/10000 (96%)

96.34
[3 ,  300] loss: 0.151
[3 ,  600] loss: 0.133
[3 ,  900] loss: 0.140
[3 , 1200] loss: 0.137
[3 , 1500] loss: 0.135
[3 , 1800] loss: 0.128

Test set: Average loss: 0.0035, Accuracy: 9638/10000 (96%)

96.38
[4 ,  300] loss: 0.104
[4 ,  600] loss: 0.116
[4 ,  900] loss: 0.111
[4 , 1200] loss: 0.118
[4 , 1500] loss: 0.104
[4 , 1800] loss: 0.117

Test set: Average loss: 0.0030, Accuracy: 9695/10000 (97%)

96.95
[5 ,  300] loss: 0.092
[5 ,  600] loss: 0.088
[5 ,  900] loss: 0.104
[5 , 1200] los

[32m[I 2021-04-28 13:08:47,069][0m Trial 21 finished with value: 97.94 and parameters: {'lr': 0.0022757665847143245, 'l1': 399, 'l2': 230}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9794/10000 (98%)

97.94
392 183 0.0032838090565570784
[1 ,  300] loss: 0.972
[1 ,  600] loss: 0.385
[1 ,  900] loss: 0.323
[1 , 1200] loss: 0.273
[1 , 1500] loss: 0.248
[1 , 1800] loss: 0.219

Test set: Average loss: 0.0067, Accuracy: 9300/10000 (93%)

93.0
[2 ,  300] loss: 0.188
[2 ,  600] loss: 0.177
[2 ,  900] loss: 0.172
[2 , 1200] loss: 0.176
[2 , 1500] loss: 0.162
[2 , 1800] loss: 0.162

Test set: Average loss: 0.0046, Accuracy: 9541/10000 (95%)

95.41
[3 ,  300] loss: 0.130
[3 ,  600] loss: 0.137
[3 ,  900] loss: 0.135
[3 , 1200] loss: 0.133
[3 , 1500] loss: 0.129
[3 , 1800] loss: 0.122

Test set: Average loss: 0.0032, Accuracy: 9688/10000 (97%)

96.88
[4 ,  300] loss: 0.110
[4 ,  600] loss: 0.109
[4 ,  900] loss: 0.106
[4 , 1200] loss: 0.104
[4 , 1500] loss: 0.108
[4 , 1800] loss: 0.103

Test set: Average loss: 0.0026, Accuracy: 9748/10000 (97%)

97.48
[5 ,  300] loss: 0.101
[5 ,  600] loss: 0.088
[5 ,  900] loss: 0.100
[5 , 1200] loss

[32m[I 2021-04-28 13:10:58,223][0m Trial 22 finished with value: 97.92 and parameters: {'lr': 0.0032838090565570784, 'l1': 392, 'l2': 183}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9792/10000 (98%)

97.92
452 287 0.0010173335949942844
[1 ,  300] loss: 1.454
[1 ,  600] loss: 0.524
[1 ,  900] loss: 0.413
[1 , 1200] loss: 0.373
[1 , 1500] loss: 0.330
[1 , 1800] loss: 0.299

Test set: Average loss: 0.0080, Accuracy: 9246/10000 (92%)

92.46
[2 ,  300] loss: 0.280
[2 ,  600] loss: 0.276
[2 ,  900] loss: 0.249
[2 , 1200] loss: 0.217
[2 , 1500] loss: 0.204
[2 , 1800] loss: 0.204

Test set: Average loss: 0.0058, Accuracy: 9476/10000 (95%)

94.76
[3 ,  300] loss: 0.192
[3 ,  600] loss: 0.185
[3 ,  900] loss: 0.178
[3 , 1200] loss: 0.173
[3 , 1500] loss: 0.159
[3 , 1800] loss: 0.162

Test set: Average loss: 0.0041, Accuracy: 9594/10000 (96%)

95.94
[4 ,  300] loss: 0.140
[4 ,  600] loss: 0.152
[4 ,  900] loss: 0.140


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

[4 , 1200] loss: 0.139
[4 , 1500] loss: 0.135
[4 , 1800] loss: 0.136


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho


Test set: Average loss: 0.0039, Accuracy: 9618/10000 (96%)

96.18


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

[5 ,  300] loss: 0.116
[5 ,  600] loss: 0.127
[5 ,  900] loss: 0.118
[5 , 1200] loss: 0.117
[5 , 1500] loss: 0.106
[5 , 1800] loss: 0.117


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho


Test set: Average loss: 0.0032, Accuracy: 9693/10000 (97%)

96.93


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child

[6 ,  300] loss: 0.108
[6 ,  600] loss: 0.102
[6 ,  900] loss: 0.106
[6 , 1200] loss: 0.097
[6 , 1500] loss: 0.093
[6 , 1800] loss: 0.103


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho


Test set: Average loss: 0.0029, Accuracy: 9710/10000 (97%)

97.1


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

[7 ,  300] loss: 0.087
[7 ,  600] loss: 0.094
[7 ,  900] loss: 0.090
[7 , 1200] loss: 0.086
[7 , 1500] loss: 0.086
[7 , 1800] loss: 0.087


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
    if w.is_alive():
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
    self._shutdown_workers()
AssertionError: can only test a child process
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho


Test set: Average loss: 0.0028, Accuracy: 9702/10000 (97%)

97.02


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
AssertionError: can only test a child process
Traceback (most recent call last):
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
Traceback (most recent call last):
    self._shutdown_workers()
  File "/usr/local/lib/python3

[8 ,  300] loss: 0.080
[8 ,  600] loss: 0.076
[8 ,  900] loss: 0.090
[8 , 1200] loss: 0.075
[8 , 1500] loss: 0.080
[8 , 1800] loss: 0.079


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho


Test set: Average loss: 0.0025, Accuracy: 9743/10000 (97%)

97.43


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1316, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcf58e20cb0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/

[9 ,  300] loss: 0.066
[9 ,  600] loss: 0.079
[9 ,  900] loss: 0.078
[9 , 1200] loss: 0.064
[9 , 1500] loss: 0.077
[9 , 1800] loss: 0.069

Test set: Average loss: 0.0026, Accuracy: 9730/10000 (97%)

97.3
[10 ,  300] loss: 0.060
[10 ,  600] loss: 0.061
[10 ,  900] loss: 0.068
[10 , 1200] loss: 0.065
[10 , 1500] loss: 0.069
[10 , 1800] loss: 0.071


[32m[I 2021-04-28 13:13:13,671][0m Trial 23 finished with value: 97.66 and parameters: {'lr': 0.0010173335949942844, 'l1': 452, 'l2': 287}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9766/10000 (98%)

97.66
343 225 0.0010091294364136917
[1 ,  300] loss: 1.525
[1 ,  600] loss: 0.545
[1 ,  900] loss: 0.429
[1 , 1200] loss: 0.371
[1 , 1500] loss: 0.347
[1 , 1800] loss: 0.320

Test set: Average loss: 0.0084, Accuracy: 9182/10000 (92%)

91.82
[2 ,  300] loss: 0.280
[2 ,  600] loss: 0.272
[2 ,  900] loss: 0.254
[2 , 1200] loss: 0.234
[2 , 1500] loss: 0.217
[2 , 1800] loss: 0.212

Test set: Average loss: 0.0053, Accuracy: 9481/10000 (95%)

94.81
[3 ,  300] loss: 0.185
[3 ,  600] loss: 0.188
[3 ,  900] loss: 0.187
[3 , 1200] loss: 0.176
[3 , 1500] loss: 0.173
[3 , 1800] loss: 0.164

Test set: Average loss: 0.0041, Accuracy: 9600/10000 (96%)

96.0
[4 ,  300] loss: 0.156
[4 ,  600] loss: 0.149
[4 ,  900] loss: 0.143
[4 , 1200] loss: 0.140
[4 , 1500] loss: 0.141
[4 , 1800] loss: 0.136

Test set: Average loss: 0.0036, Accuracy: 9657/10000 (97%)

96.57
[5 ,  300] loss: 0.118
[5 ,  600] loss: 0.124
[5 ,  900] loss: 0.125
[5 , 1200] loss

[32m[I 2021-04-28 13:15:25,047][0m Trial 24 finished with value: 97.56 and parameters: {'lr': 0.0010091294364136917, 'l1': 343, 'l2': 225}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9756/10000 (98%)

97.56
309 160 0.002178274448219048
[1 ,  300] loss: 1.109
[1 ,  600] loss: 0.428
[1 ,  900] loss: 0.339
[1 , 1200] loss: 0.304
[1 , 1500] loss: 0.268
[1 , 1800] loss: 0.267

Test set: Average loss: 0.0060, Accuracy: 9453/10000 (95%)

94.53
[2 ,  300] loss: 0.204
[2 ,  600] loss: 0.204
[2 ,  900] loss: 0.194
[2 , 1200] loss: 0.183
[2 , 1500] loss: 0.173
[2 , 1800] loss: 0.187

Test set: Average loss: 0.0040, Accuracy: 9625/10000 (96%)

96.25
[3 ,  300] loss: 0.153
[3 ,  600] loss: 0.145
[3 ,  900] loss: 0.151
[3 , 1200] loss: 0.147
[3 , 1500] loss: 0.125
[3 , 1800] loss: 0.135

Test set: Average loss: 0.0038, Accuracy: 9618/10000 (96%)

96.18
[4 ,  300] loss: 0.124
[4 ,  600] loss: 0.115
[4 ,  900] loss: 0.127
[4 , 1200] loss: 0.117
[4 , 1500] loss: 0.113
[4 , 1800] loss: 0.110

Test set: Average loss: 0.0028, Accuracy: 9713/10000 (97%)

97.13
[5 ,  300] loss: 0.104
[5 ,  600] loss: 0.108
[5 ,  900] loss: 0.093
[5 , 1200] loss

[32m[I 2021-04-28 13:17:35,014][0m Trial 25 finished with value: 97.49 and parameters: {'lr': 0.002178274448219048, 'l1': 309, 'l2': 160}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9749/10000 (97%)

97.49
420 245 0.01382551668107953
[1 ,  300] loss: 0.771
[1 ,  600] loss: 0.492
[1 ,  900] loss: 0.451
[1 , 1200] loss: 0.414
[1 , 1500] loss: 0.394
[1 , 1800] loss: 0.374

Test set: Average loss: 0.0084, Accuracy: 9190/10000 (92%)

91.9
[2 ,  300] loss: 0.327
[2 ,  600] loss: 0.324
[2 ,  900] loss: 0.315
[2 , 1200] loss: 0.305
[2 , 1500] loss: 0.304
[2 , 1800] loss: 0.289

Test set: Average loss: 0.0074, Accuracy: 9292/10000 (93%)

92.92
[3 ,  300] loss: 0.300
[3 ,  600] loss: 0.274
[3 ,  900] loss: 0.274
[3 , 1200] loss: 0.259
[3 , 1500] loss: 0.276
[3 , 1800] loss: 0.240

Test set: Average loss: 0.0053, Accuracy: 9485/10000 (95%)

94.85
[4 ,  300] loss: 0.228
[4 ,  600] loss: 0.245
[4 ,  900] loss: 0.254
[4 , 1200] loss: 0.238
[4 , 1500] loss: 0.242
[4 , 1800] loss: 0.251

Test set: Average loss: 0.0051, Accuracy: 9515/10000 (95%)

95.15
[5 ,  300] loss: 0.237
[5 ,  600] loss: 0.235
[5 ,  900] loss: 0.258
[5 , 1200] loss: 

[32m[I 2021-04-28 13:19:44,854][0m Trial 26 finished with value: 95.79 and parameters: {'lr': 0.01382551668107953, 'l1': 420, 'l2': 245}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0046, Accuracy: 9579/10000 (96%)

95.79
366 31 0.0057829600728157154
[1 ,  300] loss: 0.894
[1 ,  600] loss: 0.410
[1 ,  900] loss: 0.331
[1 , 1200] loss: 0.313
[1 , 1500] loss: 0.293
[1 , 1800] loss: 0.263

Test set: Average loss: 0.0055, Accuracy: 9457/10000 (95%)

94.57
[2 ,  300] loss: 0.229
[2 ,  600] loss: 0.207
[2 ,  900] loss: 0.221
[2 , 1200] loss: 0.197
[2 , 1500] loss: 0.208
[2 , 1800] loss: 0.191

Test set: Average loss: 0.0047, Accuracy: 9538/10000 (95%)

95.38
[3 ,  300] loss: 0.171
[3 ,  600] loss: 0.166
[3 ,  900] loss: 0.165
[3 , 1200] loss: 0.166
[3 , 1500] loss: 0.159
[3 , 1800] loss: 0.174

Test set: Average loss: 0.0038, Accuracy: 9624/10000 (96%)

96.24
[4 ,  300] loss: 0.140
[4 ,  600] loss: 0.146
[4 ,  900] loss: 0.150
[4 , 1200] loss: 0.134
[4 , 1500] loss: 0.139
[4 , 1800] loss: 0.135

Test set: Average loss: 0.0036, Accuracy: 9642/10000 (96%)

96.42
[5 ,  300] loss: 0.133
[5 ,  600] loss: 0.118
[5 ,  900] loss: 0.127
[5 , 1200] loss

[32m[I 2021-04-28 13:21:54,156][0m Trial 27 finished with value: 97.63 and parameters: {'lr': 0.0057829600728157154, 'l1': 366, 'l2': 31}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0026, Accuracy: 9763/10000 (98%)

97.63
509 491 0.0029959565799832265
[1 ,  300] loss: 0.933
[1 ,  600] loss: 0.406
[1 ,  900] loss: 0.310
[1 , 1200] loss: 0.275
[1 , 1500] loss: 0.250
[1 , 1800] loss: 0.221

Test set: Average loss: 0.0051, Accuracy: 9496/10000 (95%)

94.96
[2 ,  300] loss: 0.179
[2 ,  600] loss: 0.160
[2 ,  900] loss: 0.176
[2 , 1200] loss: 0.173
[2 , 1500] loss: 0.159
[2 , 1800] loss: 0.154

Test set: Average loss: 0.0037, Accuracy: 9642/10000 (96%)

96.42
[3 ,  300] loss: 0.126
[3 ,  600] loss: 0.128
[3 ,  900] loss: 0.133
[3 , 1200] loss: 0.118
[3 , 1500] loss: 0.123
[3 , 1800] loss: 0.111

Test set: Average loss: 0.0031, Accuracy: 9700/10000 (97%)

97.0
[4 ,  300] loss: 0.106
[4 ,  600] loss: 0.106
[4 ,  900] loss: 0.103
[4 , 1200] loss: 0.106
[4 , 1500] loss: 0.103
[4 , 1800] loss: 0.100

Test set: Average loss: 0.0028, Accuracy: 9718/10000 (97%)

97.18
[5 ,  300] loss: 0.090
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.093
[5 , 1200] loss

[32m[I 2021-04-28 13:24:04,986][0m Trial 28 finished with value: 97.83 and parameters: {'lr': 0.0029959565799832265, 'l1': 509, 'l2': 491}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0020, Accuracy: 9783/10000 (98%)

97.83
261 158 0.0012909773128109417
[1 ,  300] loss: 1.399
[1 ,  600] loss: 0.498
[1 ,  900] loss: 0.393
[1 , 1200] loss: 0.369
[1 , 1500] loss: 0.333
[1 , 1800] loss: 0.294

Test set: Average loss: 0.0077, Accuracy: 9259/10000 (93%)

92.59
[2 ,  300] loss: 0.262
[2 ,  600] loss: 0.250
[2 ,  900] loss: 0.225
[2 , 1200] loss: 0.212
[2 , 1500] loss: 0.219
[2 , 1800] loss: 0.200

Test set: Average loss: 0.0048, Accuracy: 9539/10000 (95%)

95.39
[3 ,  300] loss: 0.184
[3 ,  600] loss: 0.179
[3 ,  900] loss: 0.180
[3 , 1200] loss: 0.169
[3 , 1500] loss: 0.158
[3 , 1800] loss: 0.154

Test set: Average loss: 0.0040, Accuracy: 9594/10000 (96%)

95.94
[4 ,  300] loss: 0.144
[4 ,  600] loss: 0.134
[4 ,  900] loss: 0.146
[4 , 1200] loss: 0.131
[4 , 1500] loss: 0.132
[4 , 1800] loss: 0.142

Test set: Average loss: 0.0033, Accuracy: 9674/10000 (97%)

96.74
[5 ,  300] loss: 0.118
[5 ,  600] loss: 0.118
[5 ,  900] loss: 0.114
[5 , 1200] los

[32m[I 2021-04-28 13:26:15,382][0m Trial 29 finished with value: 97.67 and parameters: {'lr': 0.0012909773128109417, 'l1': 261, 'l2': 158}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9767/10000 (98%)

97.67
469 289 0.0006566897077377032
[1 ,  300] loss: 1.766
[1 ,  600] loss: 0.667
[1 ,  900] loss: 0.479
[1 , 1200] loss: 0.412
[1 , 1500] loss: 0.376
[1 , 1800] loss: 0.356

Test set: Average loss: 0.0095, Accuracy: 9106/10000 (91%)

91.06
[2 ,  300] loss: 0.317
[2 ,  600] loss: 0.293
[2 ,  900] loss: 0.294
[2 , 1200] loss: 0.277
[2 , 1500] loss: 0.258
[2 , 1800] loss: 0.263

Test set: Average loss: 0.0072, Accuracy: 9308/10000 (93%)

93.08
[3 ,  300] loss: 0.229
[3 ,  600] loss: 0.232
[3 ,  900] loss: 0.205
[3 , 1200] loss: 0.212
[3 , 1500] loss: 0.206
[3 , 1800] loss: 0.200

Test set: Average loss: 0.0053, Accuracy: 9495/10000 (95%)

94.95
[4 ,  300] loss: 0.186
[4 ,  600] loss: 0.178
[4 ,  900] loss: 0.175
[4 , 1200] loss: 0.170
[4 , 1500] loss: 0.168
[4 , 1800] loss: 0.158

Test set: Average loss: 0.0045, Accuracy: 9551/10000 (96%)

95.51
[5 ,  300] loss: 0.166
[5 ,  600] loss: 0.146
[5 ,  900] loss: 0.142
[5 , 1200] los

[32m[I 2021-04-28 13:28:25,400][0m Trial 30 finished with value: 97.36 and parameters: {'lr': 0.0006566897077377032, 'l1': 469, 'l2': 289}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0027, Accuracy: 9736/10000 (97%)

97.36
399 225 0.002710927992739678
[1 ,  300] loss: 1.005
[1 ,  600] loss: 0.412
[1 ,  900] loss: 0.336
[1 , 1200] loss: 0.291
[1 , 1500] loss: 0.252
[1 , 1800] loss: 0.221

Test set: Average loss: 0.0057, Accuracy: 9433/10000 (94%)

94.33
[2 ,  300] loss: 0.191
[2 ,  600] loss: 0.186
[2 ,  900] loss: 0.192
[2 , 1200] loss: 0.160
[2 , 1500] loss: 0.177
[2 , 1800] loss: 0.147

Test set: Average loss: 0.0044, Accuracy: 9567/10000 (96%)

95.67
[3 ,  300] loss: 0.138
[3 ,  600] loss: 0.138
[3 ,  900] loss: 0.138
[3 , 1200] loss: 0.142
[3 , 1500] loss: 0.126
[3 , 1800] loss: 0.118

Test set: Average loss: 0.0032, Accuracy: 9683/10000 (97%)

96.83
[4 ,  300] loss: 0.117
[4 ,  600] loss: 0.110
[4 ,  900] loss: 0.100
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.124
[4 , 1800] loss: 0.094

Test set: Average loss: 0.0028, Accuracy: 9709/10000 (97%)

97.09
[5 ,  300] loss: 0.088
[5 ,  600] loss: 0.090
[5 ,  900] loss: 0.085
[5 , 1200] loss

[32m[I 2021-04-28 13:30:34,110][0m Trial 31 finished with value: 97.77 and parameters: {'lr': 0.002710927992739678, 'l1': 399, 'l2': 225}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9777/10000 (98%)

97.77
422 239 0.0043207318637762376
[1 ,  300] loss: 0.869
[1 ,  600] loss: 0.402
[1 ,  900] loss: 0.305
[1 , 1200] loss: 0.266
[1 , 1500] loss: 0.237
[1 , 1800] loss: 0.212

Test set: Average loss: 0.0049, Accuracy: 9513/10000 (95%)

95.13
[2 ,  300] loss: 0.190
[2 ,  600] loss: 0.180
[2 ,  900] loss: 0.186
[2 , 1200] loss: 0.168
[2 , 1500] loss: 0.161
[2 , 1800] loss: 0.163

Test set: Average loss: 0.0039, Accuracy: 9604/10000 (96%)

96.04
[3 ,  300] loss: 0.141
[3 ,  600] loss: 0.129
[3 ,  900] loss: 0.131
[3 , 1200] loss: 0.116
[3 , 1500] loss: 0.123
[3 , 1800] loss: 0.127

Test set: Average loss: 0.0028, Accuracy: 9710/10000 (97%)

97.1
[4 ,  300] loss: 0.109
[4 ,  600] loss: 0.108
[4 ,  900] loss: 0.109
[4 , 1200] loss: 0.115
[4 , 1500] loss: 0.108
[4 , 1800] loss: 0.098

Test set: Average loss: 0.0031, Accuracy: 9689/10000 (97%)

96.89
[5 ,  300] loss: 0.094
[5 ,  600] loss: 0.087
[5 ,  900] loss: 0.109
[5 , 1200] loss

[32m[I 2021-04-28 13:32:42,500][0m Trial 32 finished with value: 97.93 and parameters: {'lr': 0.0043207318637762376, 'l1': 422, 'l2': 239}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9793/10000 (98%)

97.93
380 350 0.0020024192965703803
[1 ,  300] loss: 1.102
[1 ,  600] loss: 0.429
[1 ,  900] loss: 0.356
[1 , 1200] loss: 0.315
[1 , 1500] loss: 0.270
[1 , 1800] loss: 0.247

Test set: Average loss: 0.0057, Accuracy: 9441/10000 (94%)

94.41
[2 ,  300] loss: 0.210
[2 ,  600] loss: 0.190
[2 ,  900] loss: 0.195
[2 , 1200] loss: 0.173
[2 , 1500] loss: 0.186
[2 , 1800] loss: 0.169

Test set: Average loss: 0.0042, Accuracy: 9585/10000 (96%)

95.85
[3 ,  300] loss: 0.144
[3 ,  600] loss: 0.142
[3 ,  900] loss: 0.136
[3 , 1200] loss: 0.129
[3 , 1500] loss: 0.145
[3 , 1800] loss: 0.132

Test set: Average loss: 0.0036, Accuracy: 9625/10000 (96%)

96.25
[4 ,  300] loss: 0.118
[4 ,  600] loss: 0.112
[4 ,  900] loss: 0.105
[4 , 1200] loss: 0.115
[4 , 1500] loss: 0.106
[4 , 1800] loss: 0.110

Test set: Average loss: 0.0030, Accuracy: 9703/10000 (97%)

97.03
[5 ,  300] loss: 0.097
[5 ,  600] loss: 0.090
[5 ,  900] loss: 0.098
[5 , 1200] los

[32m[I 2021-04-28 13:34:49,328][0m Trial 33 finished with value: 97.83 and parameters: {'lr': 0.0020024192965703803, 'l1': 380, 'l2': 350}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0020, Accuracy: 9783/10000 (98%)

97.83
292 281 0.002683629144165228
[1 ,  300] loss: 1.036
[1 ,  600] loss: 0.403
[1 ,  900] loss: 0.336
[1 , 1200] loss: 0.284
[1 , 1500] loss: 0.258
[1 , 1800] loss: 0.217

Test set: Average loss: 0.0054, Accuracy: 9477/10000 (95%)

94.77
[2 ,  300] loss: 0.198
[2 ,  600] loss: 0.192
[2 ,  900] loss: 0.182
[2 , 1200] loss: 0.174
[2 , 1500] loss: 0.169
[2 , 1800] loss: 0.173

Test set: Average loss: 0.0044, Accuracy: 9548/10000 (95%)

95.48
[3 ,  300] loss: 0.147
[3 ,  600] loss: 0.141
[3 ,  900] loss: 0.140
[3 , 1200] loss: 0.138
[3 , 1500] loss: 0.141
[3 , 1800] loss: 0.117

Test set: Average loss: 0.0033, Accuracy: 9656/10000 (97%)

96.56
[4 ,  300] loss: 0.120
[4 ,  600] loss: 0.111
[4 ,  900] loss: 0.112
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.116
[4 , 1800] loss: 0.105

Test set: Average loss: 0.0032, Accuracy: 9674/10000 (97%)

96.74
[5 ,  300] loss: 0.096
[5 ,  600] loss: 0.102
[5 ,  900] loss: 0.088
[5 , 1200] loss

[32m[I 2021-04-28 13:36:55,879][0m Trial 34 finished with value: 98.0 and parameters: {'lr': 0.002683629144165228, 'l1': 292, 'l2': 281}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0020, Accuracy: 9800/10000 (98%)

98.0
275 307 0.00021836253524519004
[1 ,  300] loss: 2.173
[1 ,  600] loss: 1.649
[1 ,  900] loss: 1.006
[1 , 1200] loss: 0.727
[1 , 1500] loss: 0.587
[1 , 1800] loss: 0.533

Test set: Average loss: 0.0134, Accuracy: 8815/10000 (88%)

88.15
[2 ,  300] loss: 0.465
[2 ,  600] loss: 0.451
[2 ,  900] loss: 0.426
[2 , 1200] loss: 0.413
[2 , 1500] loss: 0.395
[2 , 1800] loss: 0.383

Test set: Average loss: 0.0101, Accuracy: 9078/10000 (91%)

90.78
[3 ,  300] loss: 0.371
[3 ,  600] loss: 0.378
[3 ,  900] loss: 0.339
[3 , 1200] loss: 0.335
[3 , 1500] loss: 0.334
[3 , 1800] loss: 0.318

Test set: Average loss: 0.0089, Accuracy: 9177/10000 (92%)

91.77
[4 ,  300] loss: 0.322
[4 ,  600] loss: 0.313
[4 ,  900] loss: 0.298
[4 , 1200] loss: 0.295
[4 , 1500] loss: 0.300
[4 , 1800] loss: 0.288

Test set: Average loss: 0.0080, Accuracy: 9262/10000 (93%)

92.62
[5 ,  300] loss: 0.282
[5 ,  600] loss: 0.279
[5 ,  900] loss: 0.266
[5 , 1200] los

[32m[I 2021-04-28 13:39:02,413][0m Trial 35 finished with value: 95.43 and parameters: {'lr': 0.00021836253524519004, 'l1': 275, 'l2': 307}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0048, Accuracy: 9543/10000 (95%)

95.43
242 275 0.006457444432559887
[1 ,  300] loss: 0.834
[1 ,  600] loss: 0.411
[1 ,  900] loss: 0.337
[1 , 1200] loss: 0.309
[1 , 1500] loss: 0.292
[1 , 1800] loss: 0.232

Test set: Average loss: 0.0056, Accuracy: 9446/10000 (94%)

94.46
[2 ,  300] loss: 0.221
[2 ,  600] loss: 0.220
[2 ,  900] loss: 0.224
[2 , 1200] loss: 0.207
[2 , 1500] loss: 0.192
[2 , 1800] loss: 0.175

Test set: Average loss: 0.0042, Accuracy: 9583/10000 (96%)

95.83
[3 ,  300] loss: 0.167
[3 ,  600] loss: 0.179
[3 ,  900] loss: 0.172
[3 , 1200] loss: 0.166
[3 , 1500] loss: 0.163
[3 , 1800] loss: 0.162

Test set: Average loss: 0.0037, Accuracy: 9629/10000 (96%)

96.29
[4 ,  300] loss: 0.136
[4 ,  600] loss: 0.130
[4 ,  900] loss: 0.134
[4 , 1200] loss: 0.134
[4 , 1500] loss: 0.139
[4 , 1800] loss: 0.141

Test set: Average loss: 0.0032, Accuracy: 9679/10000 (97%)

96.79
[5 ,  300] loss: 0.121
[5 ,  600] loss: 0.107
[5 ,  900] loss: 0.124
[5 , 1200] loss

[32m[I 2021-04-28 13:41:08,588][0m Trial 36 finished with value: 97.57 and parameters: {'lr': 0.006457444432559887, 'l1': 242, 'l2': 275}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0025, Accuracy: 9757/10000 (98%)

97.57
307 401 0.0007157438046619174
[1 ,  300] loss: 1.704
[1 ,  600] loss: 0.625
[1 ,  900] loss: 0.467
[1 , 1200] loss: 0.399
[1 , 1500] loss: 0.357
[1 , 1800] loss: 0.346

Test set: Average loss: 0.0089, Accuracy: 9167/10000 (92%)

91.67
[2 ,  300] loss: 0.302
[2 ,  600] loss: 0.305
[2 ,  900] loss: 0.291
[2 , 1200] loss: 0.272
[2 , 1500] loss: 0.254
[2 , 1800] loss: 0.244

Test set: Average loss: 0.0064, Accuracy: 9386/10000 (94%)

93.86
[3 ,  300] loss: 0.231
[3 ,  600] loss: 0.217
[3 ,  900] loss: 0.214
[3 , 1200] loss: 0.209
[3 , 1500] loss: 0.202
[3 , 1800] loss: 0.190

Test set: Average loss: 0.0052, Accuracy: 9498/10000 (95%)

94.98
[4 ,  300] loss: 0.180
[4 ,  600] loss: 0.181
[4 ,  900] loss: 0.173
[4 , 1200] loss: 0.166
[4 , 1500] loss: 0.163
[4 , 1800] loss: 0.163

Test set: Average loss: 0.0047, Accuracy: 9534/10000 (95%)

95.34
[5 ,  300] loss: 0.146
[5 ,  600] loss: 0.145
[5 ,  900] loss: 0.150
[5 , 1200] los

[32m[I 2021-04-28 13:43:13,837][0m Trial 37 finished with value: 97.23 and parameters: {'lr': 0.0007157438046619174, 'l1': 307, 'l2': 401}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0027, Accuracy: 9723/10000 (97%)

97.23
106 363 0.0013917828972645483
[1 ,  300] loss: 1.363
[1 ,  600] loss: 0.516
[1 ,  900] loss: 0.418
[1 , 1200] loss: 0.361
[1 , 1500] loss: 0.332
[1 , 1800] loss: 0.289

Test set: Average loss: 0.0076, Accuracy: 9281/10000 (93%)

92.81
[2 ,  300] loss: 0.274
[2 ,  600] loss: 0.258
[2 ,  900] loss: 0.244
[2 , 1200] loss: 0.237
[2 , 1500] loss: 0.217
[2 , 1800] loss: 0.221

Test set: Average loss: 0.0050, Accuracy: 9534/10000 (95%)

95.34
[3 ,  300] loss: 0.198
[3 ,  600] loss: 0.206
[3 ,  900] loss: 0.195
[3 , 1200] loss: 0.182
[3 , 1500] loss: 0.175
[3 , 1800] loss: 0.174

Test set: Average loss: 0.0040, Accuracy: 9593/10000 (96%)

95.93
[4 ,  300] loss: 0.164
[4 ,  600] loss: 0.160
[4 ,  900] loss: 0.157
[4 , 1200] loss: 0.168
[4 , 1500] loss: 0.148
[4 , 1800] loss: 0.146

Test set: Average loss: 0.0039, Accuracy: 9610/10000 (96%)

96.1
[5 ,  300] loss: 0.138
[5 ,  600] loss: 0.142
[5 ,  900] loss: 0.140
[5 , 1200] loss

[32m[I 2021-04-28 13:45:19,245][0m Trial 38 finished with value: 97.55 and parameters: {'lr': 0.0013917828972645483, 'l1': 106, 'l2': 363}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0025, Accuracy: 9755/10000 (98%)

97.55
349 59 0.011357887479441647
[1 ,  300] loss: 0.841
[1 ,  600] loss: 0.453
[1 ,  900] loss: 0.431
[1 , 1200] loss: 0.396
[1 , 1500] loss: 0.358
[1 , 1800] loss: 0.348

Test set: Average loss: 0.0088, Accuracy: 9124/10000 (91%)

91.24
[2 ,  300] loss: 0.329
[2 ,  600] loss: 0.316
[2 ,  900] loss: 0.297
[2 , 1200] loss: 0.325
[2 , 1500] loss: 0.292
[2 , 1800] loss: 0.300

Test set: Average loss: 0.0059, Accuracy: 9418/10000 (94%)

94.18
[3 ,  300] loss: 0.267
[3 ,  600] loss: 0.245
[3 ,  900] loss: 0.241
[3 , 1200] loss: 0.231
[3 , 1500] loss: 0.241
[3 , 1800] loss: 0.230

Test set: Average loss: 0.0074, Accuracy: 9272/10000 (93%)

92.72
[4 ,  300] loss: 0.239
[4 ,  600] loss: 0.224
[4 ,  900] loss: 0.220
[4 , 1200] loss: 0.221
[4 , 1500] loss: 0.231
[4 , 1800] loss: 0.222

Test set: Average loss: 0.0054, Accuracy: 9491/10000 (95%)

94.91
[5 ,  300] loss: 0.204
[5 ,  600] loss: 0.208
[5 ,  900] loss: 0.194
[5 , 1200] loss:

[32m[I 2021-04-28 13:47:24,229][0m Trial 39 finished with value: 96.81 and parameters: {'lr': 0.011357887479441647, 'l1': 349, 'l2': 59}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0036, Accuracy: 9681/10000 (97%)

96.81
203 265 0.00390806999976086
[1 ,  300] loss: 0.930
[1 ,  600] loss: 0.424
[1 ,  900] loss: 0.349
[1 , 1200] loss: 0.283
[1 , 1500] loss: 0.265
[1 , 1800] loss: 0.243

Test set: Average loss: 0.0053, Accuracy: 9486/10000 (95%)

94.86
[2 ,  300] loss: 0.219
[2 ,  600] loss: 0.206
[2 ,  900] loss: 0.186
[2 , 1200] loss: 0.181
[2 , 1500] loss: 0.191
[2 , 1800] loss: 0.163

Test set: Average loss: 0.0047, Accuracy: 9527/10000 (95%)

95.27
[3 ,  300] loss: 0.156
[3 ,  600] loss: 0.152
[3 ,  900] loss: 0.151
[3 , 1200] loss: 0.141
[3 , 1500] loss: 0.144
[3 , 1800] loss: 0.138

Test set: Average loss: 0.0033, Accuracy: 9682/10000 (97%)

96.82
[4 ,  300] loss: 0.117
[4 ,  600] loss: 0.127
[4 ,  900] loss: 0.126
[4 , 1200] loss: 0.133
[4 , 1500] loss: 0.119
[4 , 1800] loss: 0.122

Test set: Average loss: 0.0027, Accuracy: 9723/10000 (97%)

97.23
[5 ,  300] loss: 0.107
[5 ,  600] loss: 0.096
[5 ,  900] loss: 0.105
[5 , 1200] loss:

[32m[I 2021-04-28 13:49:28,833][0m Trial 40 finished with value: 96.74 and parameters: {'lr': 0.00390806999976086, 'l1': 203, 'l2': 265}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0033, Accuracy: 9674/10000 (97%)

96.74
406 216 0.0023204509313986305
[1 ,  300] loss: 1.072
[1 ,  600] loss: 0.423
[1 ,  900] loss: 0.356
[1 , 1200] loss: 0.303
[1 , 1500] loss: 0.263
[1 , 1800] loss: 0.230

Test set: Average loss: 0.0061, Accuracy: 9397/10000 (94%)

93.97
[2 ,  300] loss: 0.205
[2 ,  600] loss: 0.190
[2 ,  900] loss: 0.186
[2 , 1200] loss: 0.177
[2 , 1500] loss: 0.171
[2 , 1800] loss: 0.167

Test set: Average loss: 0.0039, Accuracy: 9595/10000 (96%)

95.95
[3 ,  300] loss: 0.145
[3 ,  600] loss: 0.138
[3 ,  900] loss: 0.141
[3 , 1200] loss: 0.140
[3 , 1500] loss: 0.120
[3 , 1800] loss: 0.128

Test set: Average loss: 0.0031, Accuracy: 9686/10000 (97%)

96.86
[4 ,  300] loss: 0.106
[4 ,  600] loss: 0.107
[4 ,  900] loss: 0.119
[4 , 1200] loss: 0.111
[4 , 1500] loss: 0.108
[4 , 1800] loss: 0.104

Test set: Average loss: 0.0031, Accuracy: 9674/10000 (97%)

96.74
[5 ,  300] loss: 0.101
[5 ,  600] loss: 0.095
[5 ,  900] loss: 0.094
[5 , 1200] los

[32m[I 2021-04-28 13:51:35,059][0m Trial 41 finished with value: 97.92 and parameters: {'lr': 0.0023204509313986305, 'l1': 406, 'l2': 216}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9792/10000 (98%)

97.92
436 179 0.002796646616286064
[1 ,  300] loss: 0.978
[1 ,  600] loss: 0.394
[1 ,  900] loss: 0.323
[1 , 1200] loss: 0.299
[1 , 1500] loss: 0.247
[1 , 1800] loss: 0.228

Test set: Average loss: 0.0064, Accuracy: 9368/10000 (94%)

93.68
[2 ,  300] loss: 0.193
[2 ,  600] loss: 0.180
[2 ,  900] loss: 0.173
[2 , 1200] loss: 0.164
[2 , 1500] loss: 0.159
[2 , 1800] loss: 0.172

Test set: Average loss: 0.0039, Accuracy: 9618/10000 (96%)

96.18
[3 ,  300] loss: 0.142
[3 ,  600] loss: 0.134
[3 ,  900] loss: 0.131
[3 , 1200] loss: 0.130
[3 , 1500] loss: 0.118
[3 , 1800] loss: 0.124

Test set: Average loss: 0.0030, Accuracy: 9696/10000 (97%)

96.96
[4 ,  300] loss: 0.101
[4 ,  600] loss: 0.118
[4 ,  900] loss: 0.107
[4 , 1200] loss: 0.104
[4 , 1500] loss: 0.104
[4 , 1800] loss: 0.105

Test set: Average loss: 0.0026, Accuracy: 9733/10000 (97%)

97.33
[5 ,  300] loss: 0.094
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.093
[5 , 1200] loss

[32m[I 2021-04-28 13:53:41,540][0m Trial 42 finished with value: 97.84 and parameters: {'lr': 0.002796646616286064, 'l1': 436, 'l2': 179}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9784/10000 (98%)

97.84
363 153 0.0012489978575460975
[1 ,  300] loss: 1.419
[1 ,  600] loss: 0.486
[1 ,  900] loss: 0.387
[1 , 1200] loss: 0.347
[1 , 1500] loss: 0.320
[1 , 1800] loss: 0.319

Test set: Average loss: 0.0071, Accuracy: 9315/10000 (93%)

93.15
[2 ,  300] loss: 0.258
[2 ,  600] loss: 0.237
[2 ,  900] loss: 0.232
[2 , 1200] loss: 0.216
[2 , 1500] loss: 0.204
[2 , 1800] loss: 0.200

Test set: Average loss: 0.0046, Accuracy: 9541/10000 (95%)

95.41
[3 ,  300] loss: 0.176
[3 ,  600] loss: 0.182
[3 ,  900] loss: 0.155
[3 , 1200] loss: 0.159
[3 , 1500] loss: 0.153
[3 , 1800] loss: 0.153

Test set: Average loss: 0.0040, Accuracy: 9598/10000 (96%)

95.98
[4 ,  300] loss: 0.145
[4 ,  600] loss: 0.130
[4 ,  900] loss: 0.128
[4 , 1200] loss: 0.125
[4 , 1500] loss: 0.122
[4 , 1800] loss: 0.125

Test set: Average loss: 0.0035, Accuracy: 9645/10000 (96%)

96.45
[5 ,  300] loss: 0.121
[5 ,  600] loss: 0.111
[5 ,  900] loss: 0.115
[5 , 1200] los

[32m[I 2021-04-28 13:55:48,010][0m Trial 43 finished with value: 97.91 and parameters: {'lr': 0.0012489978575460975, 'l1': 363, 'l2': 153}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9791/10000 (98%)

97.91
475 249 0.004483986568765351
[1 ,  300] loss: 0.871
[1 ,  600] loss: 0.378
[1 ,  900] loss: 0.310
[1 , 1200] loss: 0.264
[1 , 1500] loss: 0.235
[1 , 1800] loss: 0.209

Test set: Average loss: 0.0050, Accuracy: 9519/10000 (95%)

95.19
[2 ,  300] loss: 0.182
[2 ,  600] loss: 0.174
[2 ,  900] loss: 0.161
[2 , 1200] loss: 0.177
[2 , 1500] loss: 0.175
[2 , 1800] loss: 0.145

Test set: Average loss: 0.0038, Accuracy: 9628/10000 (96%)

96.28
[3 ,  300] loss: 0.144
[3 ,  600] loss: 0.137
[3 ,  900] loss: 0.130
[3 , 1200] loss: 0.126
[3 , 1500] loss: 0.127
[3 , 1800] loss: 0.131

Test set: Average loss: 0.0033, Accuracy: 9664/10000 (97%)

96.64
[4 ,  300] loss: 0.102
[4 ,  600] loss: 0.105
[4 ,  900] loss: 0.112
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.106
[4 , 1800] loss: 0.109

Test set: Average loss: 0.0031, Accuracy: 9690/10000 (97%)

96.9
[5 ,  300] loss: 0.094
[5 ,  600] loss: 0.087
[5 ,  900] loss: 0.095
[5 , 1200] loss:

[32m[I 2021-04-28 13:57:53,378][0m Trial 44 finished with value: 97.79 and parameters: {'lr': 0.004483986568765351, 'l1': 475, 'l2': 249}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9779/10000 (98%)

97.79
126 319 0.002009481306306368
[1 ,  300] loss: 1.156
[1 ,  600] loss: 0.440
[1 ,  900] loss: 0.386
[1 , 1200] loss: 0.329
[1 , 1500] loss: 0.290
[1 , 1800] loss: 0.269

Test set: Average loss: 0.0069, Accuracy: 9319/10000 (93%)

93.19
[2 ,  300] loss: 0.248
[2 ,  600] loss: 0.220
[2 ,  900] loss: 0.222
[2 , 1200] loss: 0.213
[2 , 1500] loss: 0.204
[2 , 1800] loss: 0.186

Test set: Average loss: 0.0046, Accuracy: 9532/10000 (95%)

95.32
[3 ,  300] loss: 0.165
[3 ,  600] loss: 0.181
[3 ,  900] loss: 0.185
[3 , 1200] loss: 0.164
[3 , 1500] loss: 0.154
[3 , 1800] loss: 0.156

Test set: Average loss: 0.0037, Accuracy: 9631/10000 (96%)

96.31
[4 ,  300] loss: 0.150
[4 ,  600] loss: 0.134
[4 ,  900] loss: 0.138
[4 , 1200] loss: 0.134
[4 , 1500] loss: 0.146
[4 , 1800] loss: 0.136

Test set: Average loss: 0.0039, Accuracy: 9621/10000 (96%)

96.21
[5 ,  300] loss: 0.130
[5 ,  600] loss: 0.128
[5 ,  900] loss: 0.114
[5 , 1200] loss

[32m[I 2021-04-28 13:59:59,220][0m Trial 45 finished with value: 97.5 and parameters: {'lr': 0.002009481306306368, 'l1': 126, 'l2': 319}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0025, Accuracy: 9750/10000 (98%)

97.5
321 218 0.016411802782099798
[1 ,  300] loss: 0.818
[1 ,  600] loss: 0.592
[1 ,  900] loss: 0.545
[1 , 1200] loss: 0.507
[1 , 1500] loss: 0.529
[1 , 1800] loss: 0.506

Test set: Average loss: 0.0103, Accuracy: 8995/10000 (90%)

89.95
[2 ,  300] loss: 0.459
[2 ,  600] loss: 0.439
[2 ,  900] loss: 0.432
[2 , 1200] loss: 0.423
[2 , 1500] loss: 0.455
[2 , 1800] loss: 0.416

Test set: Average loss: 0.0095, Accuracy: 9115/10000 (91%)

91.15
[3 ,  300] loss: 0.394
[3 ,  600] loss: 0.400
[3 ,  900] loss: 0.375
[3 , 1200] loss: 0.348
[3 , 1500] loss: 0.356
[3 , 1800] loss: 0.352

Test set: Average loss: 0.0073, Accuracy: 9360/10000 (94%)

93.6
[4 ,  300] loss: 0.350
[4 ,  600] loss: 0.343
[4 ,  900] loss: 0.341
[4 , 1200] loss: 0.342
[4 , 1500] loss: 0.364
[4 , 1800] loss: 0.331

Test set: Average loss: 0.0068, Accuracy: 9357/10000 (94%)

93.57
[5 ,  300] loss: 0.312
[5 ,  600] loss: 0.325
[5 ,  900] loss: 0.321
[5 , 1200] loss: 

[32m[I 2021-04-28 14:02:04,962][0m Trial 46 finished with value: 94.18 and parameters: {'lr': 0.016411802782099798, 'l1': 321, 'l2': 218}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0071, Accuracy: 9418/10000 (94%)

94.18
376 264 0.007286190398901614
[1 ,  300] loss: 0.791
[1 ,  600] loss: 0.400
[1 ,  900] loss: 0.362
[1 , 1200] loss: 0.297
[1 , 1500] loss: 0.256
[1 , 1800] loss: 0.246

Test set: Average loss: 0.0058, Accuracy: 9417/10000 (94%)

94.17
[2 ,  300] loss: 0.220
[2 ,  600] loss: 0.201
[2 ,  900] loss: 0.194
[2 , 1200] loss: 0.198
[2 , 1500] loss: 0.188
[2 , 1800] loss: 0.186

Test set: Average loss: 0.0049, Accuracy: 9487/10000 (95%)

94.87
[3 ,  300] loss: 0.170
[3 ,  600] loss: 0.150
[3 ,  900] loss: 0.156
[3 , 1200] loss: 0.152
[3 , 1500] loss: 0.146
[3 , 1800] loss: 0.141

Test set: Average loss: 0.0035, Accuracy: 9657/10000 (97%)

96.57
[4 ,  300] loss: 0.133
[4 ,  600] loss: 0.125
[4 ,  900] loss: 0.131
[4 , 1200] loss: 0.122
[4 , 1500] loss: 0.129
[4 , 1800] loss: 0.127

Test set: Average loss: 0.0031, Accuracy: 9695/10000 (97%)

96.95
[5 ,  300] loss: 0.115
[5 ,  600] loss: 0.101
[5 ,  900] loss: 0.111
[5 , 1200] loss

[32m[I 2021-04-28 14:04:12,111][0m Trial 47 finished with value: 97.33 and parameters: {'lr': 0.007286190398901614, 'l1': 376, 'l2': 264}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0028, Accuracy: 9733/10000 (97%)

97.33
284 208 0.0017141888000004888
[1 ,  300] loss: 1.234
[1 ,  600] loss: 0.463
[1 ,  900] loss: 0.373
[1 , 1200] loss: 0.325
[1 , 1500] loss: 0.302
[1 , 1800] loss: 0.276

Test set: Average loss: 0.0067, Accuracy: 9360/10000 (94%)

93.6
[2 ,  300] loss: 0.238
[2 ,  600] loss: 0.215
[2 ,  900] loss: 0.204
[2 , 1200] loss: 0.206
[2 , 1500] loss: 0.189
[2 , 1800] loss: 0.178

Test set: Average loss: 0.0043, Accuracy: 9590/10000 (96%)

95.9
[3 ,  300] loss: 0.158
[3 ,  600] loss: 0.158
[3 ,  900] loss: 0.157
[3 , 1200] loss: 0.144
[3 , 1500] loss: 0.148
[3 , 1800] loss: 0.135

Test set: Average loss: 0.0035, Accuracy: 9643/10000 (96%)

96.43
[4 ,  300] loss: 0.135
[4 ,  600] loss: 0.124
[4 ,  900] loss: 0.122
[4 , 1200] loss: 0.115
[4 , 1500] loss: 0.114
[4 , 1800] loss: 0.125

Test set: Average loss: 0.0035, Accuracy: 9654/10000 (97%)

96.54
[5 ,  300] loss: 0.108
[5 ,  600] loss: 0.103
[5 ,  900] loss: 0.116
[5 , 1200] loss:

[32m[I 2021-04-28 14:06:16,651][0m Trial 48 finished with value: 97.77 and parameters: {'lr': 0.0017141888000004888, 'l1': 284, 'l2': 208}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9777/10000 (98%)

97.77
429 244 0.0003822955275774012
[1 ,  300] loss: 2.054
[1 ,  600] loss: 1.097
[1 ,  900] loss: 0.641
[1 , 1200] loss: 0.514
[1 , 1500] loss: 0.445
[1 , 1800] loss: 0.426

Test set: Average loss: 0.0110, Accuracy: 8991/10000 (90%)

89.91
[2 ,  300] loss: 0.395
[2 ,  600] loss: 0.355
[2 ,  900] loss: 0.351
[2 , 1200] loss: 0.349
[2 , 1500] loss: 0.320
[2 , 1800] loss: 0.315

Test set: Average loss: 0.0086, Accuracy: 9201/10000 (92%)

92.01
[3 ,  300] loss: 0.290
[3 ,  600] loss: 0.297
[3 ,  900] loss: 0.282
[3 , 1200] loss: 0.262
[3 , 1500] loss: 0.266
[3 , 1800] loss: 0.270

Test set: Average loss: 0.0072, Accuracy: 9334/10000 (93%)

93.34
[4 ,  300] loss: 0.239
[4 ,  600] loss: 0.238
[4 ,  900] loss: 0.236
[4 , 1200] loss: 0.238
[4 , 1500] loss: 0.224
[4 , 1800] loss: 0.226

Test set: Average loss: 0.0061, Accuracy: 9426/10000 (94%)

94.26
[5 ,  300] loss: 0.211
[5 ,  600] loss: 0.210
[5 ,  900] loss: 0.194
[5 , 1200] los

[32m[I 2021-04-28 14:08:24,279][0m Trial 49 finished with value: 96.75 and parameters: {'lr': 0.0003822955275774012, 'l1': 429, 'l2': 244}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0034, Accuracy: 9675/10000 (97%)

96.75
224 105 0.0007836629235863644
[1 ,  300] loss: 1.794
[1 ,  600] loss: 0.665
[1 ,  900] loss: 0.472
[1 , 1200] loss: 0.424
[1 , 1500] loss: 0.371
[1 , 1800] loss: 0.338

Test set: Average loss: 0.0089, Accuracy: 9191/10000 (92%)

91.91
[2 ,  300] loss: 0.318
[2 ,  600] loss: 0.292
[2 ,  900] loss: 0.308
[2 , 1200] loss: 0.278
[2 , 1500] loss: 0.252
[2 , 1800] loss: 0.252

Test set: Average loss: 0.0064, Accuracy: 9363/10000 (94%)

93.63
[3 ,  300] loss: 0.225
[3 ,  600] loss: 0.230
[3 ,  900] loss: 0.220
[3 , 1200] loss: 0.200
[3 , 1500] loss: 0.195
[3 , 1800] loss: 0.204

Test set: Average loss: 0.0052, Accuracy: 9506/10000 (95%)

95.06
[4 ,  300] loss: 0.174
[4 ,  600] loss: 0.182
[4 ,  900] loss: 0.169
[4 , 1200] loss: 0.169
[4 , 1500] loss: 0.173
[4 , 1800] loss: 0.173

Test set: Average loss: 0.0044, Accuracy: 9574/10000 (96%)

95.74
[5 ,  300] loss: 0.166
[5 ,  600] loss: 0.148
[5 ,  900] loss: 0.147
[5 , 1200] los

[32m[I 2021-04-28 14:10:30,495][0m Trial 50 finished with value: 97.42 and parameters: {'lr': 0.0007836629235863644, 'l1': 224, 'l2': 105}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0027, Accuracy: 9742/10000 (97%)

97.42
410 229 0.004916308401562389
[1 ,  300] loss: 0.834
[1 ,  600] loss: 0.392
[1 ,  900] loss: 0.321
[1 , 1200] loss: 0.291
[1 , 1500] loss: 0.238
[1 , 1800] loss: 0.213

Test set: Average loss: 0.0047, Accuracy: 9513/10000 (95%)

95.13
[2 ,  300] loss: 0.183
[2 ,  600] loss: 0.197
[2 ,  900] loss: 0.191
[2 , 1200] loss: 0.173
[2 , 1500] loss: 0.156
[2 , 1800] loss: 0.149

Test set: Average loss: 0.0036, Accuracy: 9634/10000 (96%)

96.34
[3 ,  300] loss: 0.142
[3 ,  600] loss: 0.133
[3 ,  900] loss: 0.137
[3 , 1200] loss: 0.143
[3 , 1500] loss: 0.135
[3 , 1800] loss: 0.134

Test set: Average loss: 0.0034, Accuracy: 9658/10000 (97%)

96.58
[4 ,  300] loss: 0.118
[4 ,  600] loss: 0.124
[4 ,  900] loss: 0.112
[4 , 1200] loss: 0.115
[4 , 1500] loss: 0.106
[4 , 1800] loss: 0.113

Test set: Average loss: 0.0046, Accuracy: 9510/10000 (95%)

95.1
[5 ,  300] loss: 0.111
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.092
[5 , 1200] loss:

[32m[I 2021-04-28 14:12:35,656][0m Trial 51 finished with value: 97.95 and parameters: {'lr': 0.004916308401562389, 'l1': 410, 'l2': 229}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9795/10000 (98%)

97.95
484 175 0.0035427937971075915
[1 ,  300] loss: 0.895
[1 ,  600] loss: 0.380
[1 ,  900] loss: 0.316
[1 , 1200] loss: 0.279
[1 , 1500] loss: 0.240
[1 , 1800] loss: 0.228

Test set: Average loss: 0.0049, Accuracy: 9524/10000 (95%)

95.24
[2 ,  300] loss: 0.192
[2 ,  600] loss: 0.184
[2 ,  900] loss: 0.169
[2 , 1200] loss: 0.161
[2 , 1500] loss: 0.166
[2 , 1800] loss: 0.156

Test set: Average loss: 0.0039, Accuracy: 9609/10000 (96%)

96.09
[3 ,  300] loss: 0.145
[3 ,  600] loss: 0.127
[3 ,  900] loss: 0.132
[3 , 1200] loss: 0.123
[3 , 1500] loss: 0.135
[3 , 1800] loss: 0.126

Test set: Average loss: 0.0035, Accuracy: 9655/10000 (97%)

96.55
[4 ,  300] loss: 0.107
[4 ,  600] loss: 0.111
[4 ,  900] loss: 0.105
[4 , 1200] loss: 0.101
[4 , 1500] loss: 0.098
[4 , 1800] loss: 0.102

Test set: Average loss: 0.0027, Accuracy: 9729/10000 (97%)

97.29
[5 ,  300] loss: 0.088
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.096
[5 , 1200] los

[32m[I 2021-04-28 14:14:40,133][0m Trial 52 finished with value: 97.7 and parameters: {'lr': 0.0035427937971075915, 'l1': 484, 'l2': 175}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9770/10000 (98%)

97.7
409 274 0.005202966146025217
[1 ,  300] loss: 0.860
[1 ,  600] loss: 0.389
[1 ,  900] loss: 0.296
[1 , 1200] loss: 0.276
[1 , 1500] loss: 0.239
[1 , 1800] loss: 0.214

Test set: Average loss: 0.0051, Accuracy: 9486/10000 (95%)

94.86
[2 ,  300] loss: 0.184
[2 ,  600] loss: 0.197
[2 ,  900] loss: 0.180
[2 , 1200] loss: 0.176
[2 , 1500] loss: 0.172
[2 , 1800] loss: 0.152

Test set: Average loss: 0.0041, Accuracy: 9566/10000 (96%)

95.66
[3 ,  300] loss: 0.139
[3 ,  600] loss: 0.139
[3 ,  900] loss: 0.138
[3 , 1200] loss: 0.137
[3 , 1500] loss: 0.141
[3 , 1800] loss: 0.129

Test set: Average loss: 0.0032, Accuracy: 9694/10000 (97%)

96.94
[4 ,  300] loss: 0.111
[4 ,  600] loss: 0.117
[4 ,  900] loss: 0.112
[4 , 1200] loss: 0.111
[4 , 1500] loss: 0.114
[4 , 1800] loss: 0.107

Test set: Average loss: 0.0034, Accuracy: 9642/10000 (96%)

96.42
[5 ,  300] loss: 0.089
[5 ,  600] loss: 0.099
[5 ,  900] loss: 0.089
[5 , 1200] loss:

[32m[I 2021-04-28 14:16:45,214][0m Trial 53 finished with value: 98.03 and parameters: {'lr': 0.005202966146025217, 'l1': 409, 'l2': 274}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9803/10000 (98%)

98.03
354 302 0.00875812513745206
[1 ,  300] loss: 0.787
[1 ,  600] loss: 0.398
[1 ,  900] loss: 0.362
[1 , 1200] loss: 0.310
[1 , 1500] loss: 0.293
[1 , 1800] loss: 0.272

Test set: Average loss: 0.0060, Accuracy: 9405/10000 (94%)

94.05
[2 ,  300] loss: 0.230
[2 ,  600] loss: 0.242
[2 ,  900] loss: 0.233
[2 , 1200] loss: 0.223
[2 , 1500] loss: 0.195
[2 , 1800] loss: 0.197

Test set: Average loss: 0.0047, Accuracy: 9533/10000 (95%)

95.33
[3 ,  300] loss: 0.178
[3 ,  600] loss: 0.177
[3 ,  900] loss: 0.177
[3 , 1200] loss: 0.188
[3 , 1500] loss: 0.166
[3 , 1800] loss: 0.163

Test set: Average loss: 0.0038, Accuracy: 9625/10000 (96%)

96.25
[4 ,  300] loss: 0.135
[4 ,  600] loss: 0.146
[4 ,  900] loss: 0.160
[4 , 1200] loss: 0.147
[4 , 1500] loss: 0.129
[4 , 1800] loss: 0.151

Test set: Average loss: 0.0034, Accuracy: 9678/10000 (97%)

96.78
[5 ,  300] loss: 0.122
[5 ,  600] loss: 0.137
[5 ,  900] loss: 0.127
[5 , 1200] loss:

[32m[I 2021-04-28 14:18:49,552][0m Trial 54 finished with value: 97.14 and parameters: {'lr': 0.00875812513745206, 'l1': 354, 'l2': 302}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0031, Accuracy: 9714/10000 (97%)

97.14
446 339 0.005441092982597947
[1 ,  300] loss: 0.845
[1 ,  600] loss: 0.376
[1 ,  900] loss: 0.334
[1 , 1200] loss: 0.280
[1 , 1500] loss: 0.217
[1 , 1800] loss: 0.220

Test set: Average loss: 0.0054, Accuracy: 9475/10000 (95%)

94.75
[2 ,  300] loss: 0.183
[2 ,  600] loss: 0.186
[2 ,  900] loss: 0.179
[2 , 1200] loss: 0.182
[2 , 1500] loss: 0.169
[2 , 1800] loss: 0.177

Test set: Average loss: 0.0039, Accuracy: 9593/10000 (96%)

95.93
[3 ,  300] loss: 0.143
[3 ,  600] loss: 0.133
[3 ,  900] loss: 0.139
[3 , 1200] loss: 0.129
[3 , 1500] loss: 0.134
[3 , 1800] loss: 0.120

Test set: Average loss: 0.0031, Accuracy: 9697/10000 (97%)

96.97
[4 ,  300] loss: 0.118
[4 ,  600] loss: 0.114
[4 ,  900] loss: 0.114
[4 , 1200] loss: 0.100
[4 , 1500] loss: 0.120
[4 , 1800] loss: 0.098

Test set: Average loss: 0.0028, Accuracy: 9731/10000 (97%)

97.31
[5 ,  300] loss: 0.088
[5 ,  600] loss: 0.092
[5 ,  900] loss: 0.100
[5 , 1200] loss

[32m[I 2021-04-28 14:20:53,242][0m Trial 55 finished with value: 97.75 and parameters: {'lr': 0.005441092982597947, 'l1': 446, 'l2': 339}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9775/10000 (98%)

97.75
384 279 0.004816032085451804
[1 ,  300] loss: 0.852
[1 ,  600] loss: 0.384
[1 ,  900] loss: 0.319
[1 , 1200] loss: 0.276
[1 , 1500] loss: 0.242
[1 , 1800] loss: 0.222

Test set: Average loss: 0.0054, Accuracy: 9440/10000 (94%)

94.4
[2 ,  300] loss: 0.194
[2 ,  600] loss: 0.192
[2 ,  900] loss: 0.180
[2 , 1200] loss: 0.173
[2 , 1500] loss: 0.158
[2 , 1800] loss: 0.166

Test set: Average loss: 0.0039, Accuracy: 9603/10000 (96%)

96.03
[3 ,  300] loss: 0.142
[3 ,  600] loss: 0.133
[3 ,  900] loss: 0.127
[3 , 1200] loss: 0.131
[3 , 1500] loss: 0.135
[3 , 1800] loss: 0.134

Test set: Average loss: 0.0034, Accuracy: 9652/10000 (97%)

96.52
[4 ,  300] loss: 0.125
[4 ,  600] loss: 0.106
[4 ,  900] loss: 0.106
[4 , 1200] loss: 0.111
[4 , 1500] loss: 0.112
[4 , 1800] loss: 0.117

Test set: Average loss: 0.0028, Accuracy: 9717/10000 (97%)

97.17
[5 ,  300] loss: 0.103
[5 ,  600] loss: 0.096
[5 ,  900] loss: 0.100
[5 , 1200] loss:

[32m[I 2021-04-28 14:22:58,584][0m Trial 56 finished with value: 97.87 and parameters: {'lr': 0.004816032085451804, 'l1': 384, 'l2': 279}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9787/10000 (98%)

97.87
330 261 0.003375883139053408
[1 ,  300] loss: 0.941
[1 ,  600] loss: 0.401
[1 ,  900] loss: 0.337
[1 , 1200] loss: 0.270
[1 , 1500] loss: 0.259
[1 , 1800] loss: 0.232

Test set: Average loss: 0.0058, Accuracy: 9411/10000 (94%)

94.11
[2 ,  300] loss: 0.200
[2 ,  600] loss: 0.181
[2 ,  900] loss: 0.177
[2 , 1200] loss: 0.181
[2 , 1500] loss: 0.176
[2 , 1800] loss: 0.155

Test set: Average loss: 0.0041, Accuracy: 9585/10000 (96%)

95.85
[3 ,  300] loss: 0.146
[3 ,  600] loss: 0.137
[3 ,  900] loss: 0.138
[3 , 1200] loss: 0.138
[3 , 1500] loss: 0.134
[3 , 1800] loss: 0.141

Test set: Average loss: 0.0037, Accuracy: 9618/10000 (96%)

96.18
[4 ,  300] loss: 0.115
[4 ,  600] loss: 0.112
[4 ,  900] loss: 0.113
[4 , 1200] loss: 0.106
[4 , 1500] loss: 0.107
[4 , 1800] loss: 0.115

Test set: Average loss: 0.0030, Accuracy: 9704/10000 (97%)

97.04
[5 ,  300] loss: 0.100
[5 ,  600] loss: 0.098
[5 ,  900] loss: 0.100
[5 , 1200] loss

[32m[I 2021-04-28 14:25:00,790][0m Trial 57 finished with value: 97.83 and parameters: {'lr': 0.003375883139053408, 'l1': 330, 'l2': 261}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9783/10000 (98%)

97.83
414 198 0.010008219587122795
[1 ,  300] loss: 0.787
[1 ,  600] loss: 0.413
[1 ,  900] loss: 0.367
[1 , 1200] loss: 0.333
[1 , 1500] loss: 0.310
[1 , 1800] loss: 0.291

Test set: Average loss: 0.0066, Accuracy: 9327/10000 (93%)

93.27
[2 ,  300] loss: 0.256
[2 ,  600] loss: 0.242
[2 ,  900] loss: 0.249
[2 , 1200] loss: 0.232
[2 , 1500] loss: 0.213
[2 , 1800] loss: 0.213

Test set: Average loss: 0.0053, Accuracy: 9509/10000 (95%)

95.09
[3 ,  300] loss: 0.182
[3 ,  600] loss: 0.198
[3 ,  900] loss: 0.196
[3 , 1200] loss: 0.184
[3 , 1500] loss: 0.176
[3 , 1800] loss: 0.176

Test set: Average loss: 0.0043, Accuracy: 9565/10000 (96%)

95.65
[4 ,  300] loss: 0.163
[4 ,  600] loss: 0.163
[4 ,  900] loss: 0.167
[4 , 1200] loss: 0.162
[4 , 1500] loss: 0.155
[4 , 1800] loss: 0.158

Test set: Average loss: 0.0046, Accuracy: 9544/10000 (95%)

95.44
[5 ,  300] loss: 0.147
[5 ,  600] loss: 0.125
[5 ,  900] loss: 0.153
[5 , 1200] loss

[32m[I 2021-04-28 14:27:03,241][0m Trial 58 finished with value: 97.12 and parameters: {'lr': 0.010008219587122795, 'l1': 414, 'l2': 198}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0031, Accuracy: 9712/10000 (97%)

97.12
462 234 0.018554783173199797
[1 ,  300] loss: 0.847
[1 ,  600] loss: 0.632
[1 ,  900] loss: 0.600
[1 , 1200] loss: 0.585
[1 , 1500] loss: 0.527
[1 , 1800] loss: 0.490

Test set: Average loss: 0.0109, Accuracy: 8852/10000 (89%)

88.52
[2 ,  300] loss: 0.455
[2 ,  600] loss: 0.487
[2 ,  900] loss: 0.486
[2 , 1200] loss: 0.466
[2 , 1500] loss: 0.424
[2 , 1800] loss: 0.434

Test set: Average loss: 0.0092, Accuracy: 9190/10000 (92%)

91.9
[3 ,  300] loss: 0.389
[3 ,  600] loss: 0.417
[3 ,  900] loss: 0.389
[3 , 1200] loss: 0.417
[3 , 1500] loss: 0.428
[3 , 1800] loss: 0.450

Test set: Average loss: 0.0094, Accuracy: 9183/10000 (92%)

91.83
[4 ,  300] loss: 0.408
[4 ,  600] loss: 0.404
[4 ,  900] loss: 0.448
[4 , 1200] loss: 0.393
[4 , 1500] loss: 0.415
[4 , 1800] loss: 0.431

Test set: Average loss: 0.0096, Accuracy: 9168/10000 (92%)

91.68
[5 ,  300] loss: 0.403
[5 ,  600] loss: 0.408
[5 ,  900] loss: 0.426
[5 , 1200] loss:

[32m[I 2021-04-28 14:29:08,166][0m Trial 59 finished with value: 93.55 and parameters: {'lr': 0.018554783173199797, 'l1': 462, 'l2': 234}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0077, Accuracy: 9355/10000 (94%)

93.55
509 318 0.0015755582504935373
[1 ,  300] loss: 1.160
[1 ,  600] loss: 0.449
[1 ,  900] loss: 0.355
[1 , 1200] loss: 0.319
[1 , 1500] loss: 0.296
[1 , 1800] loss: 0.265

Test set: Average loss: 0.0072, Accuracy: 9297/10000 (93%)

92.97
[2 ,  300] loss: 0.214
[2 ,  600] loss: 0.215
[2 ,  900] loss: 0.208
[2 , 1200] loss: 0.190
[2 , 1500] loss: 0.180
[2 , 1800] loss: 0.177

Test set: Average loss: 0.0046, Accuracy: 9569/10000 (96%)

95.69
[3 ,  300] loss: 0.142
[3 ,  600] loss: 0.153
[3 ,  900] loss: 0.152
[3 , 1200] loss: 0.148
[3 , 1500] loss: 0.142
[3 , 1800] loss: 0.127

Test set: Average loss: 0.0037, Accuracy: 9618/10000 (96%)

96.18
[4 ,  300] loss: 0.121
[4 ,  600] loss: 0.116
[4 ,  900] loss: 0.111
[4 , 1200] loss: 0.119
[4 , 1500] loss: 0.104
[4 , 1800] loss: 0.108

Test set: Average loss: 0.0031, Accuracy: 9691/10000 (97%)

96.91
[5 ,  300] loss: 0.091
[5 ,  600] loss: 0.105
[5 ,  900] loss: 0.097
[5 , 1200] los

[32m[I 2021-04-28 14:31:14,394][0m Trial 60 finished with value: 97.88 and parameters: {'lr': 0.0015755582504935373, 'l1': 509, 'l2': 318}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9788/10000 (98%)

97.88
397 224 0.002468961136104104
[1 ,  300] loss: 0.995
[1 ,  600] loss: 0.406
[1 ,  900] loss: 0.335
[1 , 1200] loss: 0.290
[1 , 1500] loss: 0.265
[1 , 1800] loss: 0.260

Test set: Average loss: 0.0056, Accuracy: 9431/10000 (94%)

94.31
[2 ,  300] loss: 0.205
[2 ,  600] loss: 0.195
[2 ,  900] loss: 0.171
[2 , 1200] loss: 0.171
[2 , 1500] loss: 0.167
[2 , 1800] loss: 0.161

Test set: Average loss: 0.0041, Accuracy: 9581/10000 (96%)

95.81
[3 ,  300] loss: 0.145
[3 ,  600] loss: 0.126
[3 ,  900] loss: 0.130
[3 , 1200] loss: 0.136
[3 , 1500] loss: 0.133
[3 , 1800] loss: 0.129

Test set: Average loss: 0.0031, Accuracy: 9677/10000 (97%)

96.77
[4 ,  300] loss: 0.108
[4 ,  600] loss: 0.101
[4 ,  900] loss: 0.114
[4 , 1200] loss: 0.112
[4 , 1500] loss: 0.109
[4 , 1800] loss: 0.109

Test set: Average loss: 0.0030, Accuracy: 9707/10000 (97%)

97.07
[5 ,  300] loss: 0.098
[5 ,  600] loss: 0.093
[5 ,  900] loss: 0.099
[5 , 1200] loss

[32m[I 2021-04-28 14:33:20,926][0m Trial 61 finished with value: 97.82 and parameters: {'lr': 0.002468961136104104, 'l1': 397, 'l2': 224}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9782/10000 (98%)

97.82
441 281 0.0030704428800988823
[1 ,  300] loss: 0.948
[1 ,  600] loss: 0.405
[1 ,  900] loss: 0.309
[1 , 1200] loss: 0.272
[1 , 1500] loss: 0.246
[1 , 1800] loss: 0.223

Test set: Average loss: 0.0053, Accuracy: 9472/10000 (95%)

94.72
[2 ,  300] loss: 0.200
[2 ,  600] loss: 0.187
[2 ,  900] loss: 0.168
[2 , 1200] loss: 0.172
[2 , 1500] loss: 0.158
[2 , 1800] loss: 0.151

Test set: Average loss: 0.0038, Accuracy: 9590/10000 (96%)

95.9
[3 ,  300] loss: 0.139
[3 ,  600] loss: 0.121
[3 ,  900] loss: 0.135
[3 , 1200] loss: 0.132
[3 , 1500] loss: 0.125
[3 , 1800] loss: 0.119

Test set: Average loss: 0.0033, Accuracy: 9659/10000 (97%)

96.59
[4 ,  300] loss: 0.104
[4 ,  600] loss: 0.107
[4 ,  900] loss: 0.102
[4 , 1200] loss: 0.102
[4 , 1500] loss: 0.104
[4 , 1800] loss: 0.106

Test set: Average loss: 0.0026, Accuracy: 9743/10000 (97%)

97.43
[5 ,  300] loss: 0.092
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.094
[5 , 1200] loss

[32m[I 2021-04-28 14:35:28,762][0m Trial 62 finished with value: 97.65 and parameters: {'lr': 0.0030704428800988823, 'l1': 441, 'l2': 281}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9765/10000 (98%)

97.65
390 253 0.005946484659901519
[1 ,  300] loss: 0.829
[1 ,  600] loss: 0.361
[1 ,  900] loss: 0.316
[1 , 1200] loss: 0.274
[1 , 1500] loss: 0.242
[1 , 1800] loss: 0.231

Test set: Average loss: 0.0062, Accuracy: 9364/10000 (94%)

93.64
[2 ,  300] loss: 0.197
[2 ,  600] loss: 0.207
[2 ,  900] loss: 0.195
[2 , 1200] loss: 0.169
[2 , 1500] loss: 0.182
[2 , 1800] loss: 0.166

Test set: Average loss: 0.0046, Accuracy: 9523/10000 (95%)

95.23
[3 ,  300] loss: 0.144
[3 ,  600] loss: 0.160
[3 ,  900] loss: 0.137
[3 , 1200] loss: 0.133
[3 , 1500] loss: 0.128
[3 , 1800] loss: 0.155

Test set: Average loss: 0.0033, Accuracy: 9670/10000 (97%)

96.7
[4 ,  300] loss: 0.116
[4 ,  600] loss: 0.118
[4 ,  900] loss: 0.128
[4 , 1200] loss: 0.119
[4 , 1500] loss: 0.122
[4 , 1800] loss: 0.114

Test set: Average loss: 0.0038, Accuracy: 9622/10000 (96%)

96.22
[5 ,  300] loss: 0.092
[5 ,  600] loss: 0.101
[5 ,  900] loss: 0.109
[5 , 1200] loss:

[32m[I 2021-04-28 14:37:38,359][0m Trial 63 finished with value: 97.9 and parameters: {'lr': 0.005946484659901519, 'l1': 390, 'l2': 253}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9790/10000 (98%)

97.9
409 207 0.00109404052498349
[1 ,  300] loss: 1.381
[1 ,  600] loss: 0.498
[1 ,  900] loss: 0.409
[1 , 1200] loss: 0.365
[1 , 1500] loss: 0.314
[1 , 1800] loss: 0.314

Test set: Average loss: 0.0080, Accuracy: 9224/10000 (92%)

92.24
[2 ,  300] loss: 0.276
[2 ,  600] loss: 0.247
[2 ,  900] loss: 0.238
[2 , 1200] loss: 0.233
[2 , 1500] loss: 0.197
[2 , 1800] loss: 0.204

Test set: Average loss: 0.0053, Accuracy: 9495/10000 (95%)

94.95
[3 ,  300] loss: 0.193
[3 ,  600] loss: 0.177
[3 ,  900] loss: 0.177
[3 , 1200] loss: 0.165
[3 , 1500] loss: 0.163
[3 , 1800] loss: 0.153

Test set: Average loss: 0.0044, Accuracy: 9547/10000 (95%)

95.47
[4 ,  300] loss: 0.147
[4 ,  600] loss: 0.137
[4 ,  900] loss: 0.138
[4 , 1200] loss: 0.142
[4 , 1500] loss: 0.131
[4 , 1800] loss: 0.131

Test set: Average loss: 0.0036, Accuracy: 9655/10000 (97%)

96.55
[5 ,  300] loss: 0.121
[5 ,  600] loss: 0.110
[5 ,  900] loss: 0.118
[5 , 1200] loss: 

[32m[I 2021-04-28 14:39:50,362][0m Trial 64 finished with value: 97.66 and parameters: {'lr': 0.00109404052498349, 'l1': 409, 'l2': 207}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9766/10000 (98%)

97.66
364 231 0.0017613007757830634
[1 ,  300] loss: 1.155
[1 ,  600] loss: 0.431
[1 ,  900] loss: 0.371
[1 , 1200] loss: 0.305
[1 , 1500] loss: 0.288
[1 , 1800] loss: 0.271

Test set: Average loss: 0.0065, Accuracy: 9389/10000 (94%)

93.89
[2 ,  300] loss: 0.224
[2 ,  600] loss: 0.211
[2 ,  900] loss: 0.203
[2 , 1200] loss: 0.195
[2 , 1500] loss: 0.180
[2 , 1800] loss: 0.174

Test set: Average loss: 0.0045, Accuracy: 9564/10000 (96%)

95.64
[3 ,  300] loss: 0.159
[3 ,  600] loss: 0.158
[3 ,  900] loss: 0.135
[3 , 1200] loss: 0.145
[3 , 1500] loss: 0.139
[3 , 1800] loss: 0.138

Test set: Average loss: 0.0036, Accuracy: 9638/10000 (96%)

96.38
[4 ,  300] loss: 0.117
[4 ,  600] loss: 0.123
[4 ,  900] loss: 0.119
[4 , 1200] loss: 0.127
[4 , 1500] loss: 0.118
[4 , 1800] loss: 0.111

Test set: Average loss: 0.0030, Accuracy: 9701/10000 (97%)

97.01
[5 ,  300] loss: 0.096
[5 ,  600] loss: 0.103
[5 ,  900] loss: 0.099
[5 , 1200] los

[32m[I 2021-04-28 14:42:02,520][0m Trial 65 finished with value: 97.74 and parameters: {'lr': 0.0017613007757830634, 'l1': 364, 'l2': 231}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9774/10000 (98%)

97.74
294 190 0.0024808904258766576
[1 ,  300] loss: 1.043
[1 ,  600] loss: 0.415
[1 ,  900] loss: 0.340
[1 , 1200] loss: 0.294
[1 , 1500] loss: 0.271
[1 , 1800] loss: 0.249

Test set: Average loss: 0.0061, Accuracy: 9381/10000 (94%)

93.81
[2 ,  300] loss: 0.211
[2 ,  600] loss: 0.203
[2 ,  900] loss: 0.186
[2 , 1200] loss: 0.184
[2 , 1500] loss: 0.165
[2 , 1800] loss: 0.159

Test set: Average loss: 0.0046, Accuracy: 9542/10000 (95%)

95.42
[3 ,  300] loss: 0.143
[3 ,  600] loss: 0.145
[3 ,  900] loss: 0.134
[3 , 1200] loss: 0.161
[3 , 1500] loss: 0.141
[3 , 1800] loss: 0.132

Test set: Average loss: 0.0036, Accuracy: 9644/10000 (96%)

96.44
[4 ,  300] loss: 0.121
[4 ,  600] loss: 0.123
[4 ,  900] loss: 0.120
[4 , 1200] loss: 0.123
[4 , 1500] loss: 0.117
[4 , 1800] loss: 0.097

Test set: Average loss: 0.0032, Accuracy: 9686/10000 (97%)

96.86
[5 ,  300] loss: 0.103
[5 ,  600] loss: 0.103
[5 ,  900] loss: 0.098
[5 , 1200] los

[32m[I 2021-04-28 14:44:11,496][0m Trial 66 finished with value: 97.53 and parameters: {'lr': 0.0024808904258766576, 'l1': 294, 'l2': 190}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0026, Accuracy: 9753/10000 (98%)

97.53
424 301 0.004372279524722135
[1 ,  300] loss: 0.850
[1 ,  600] loss: 0.380
[1 ,  900] loss: 0.321
[1 , 1200] loss: 0.260
[1 , 1500] loss: 0.248
[1 , 1800] loss: 0.224

Test set: Average loss: 0.0055, Accuracy: 9427/10000 (94%)

94.27
[2 ,  300] loss: 0.193
[2 ,  600] loss: 0.189
[2 ,  900] loss: 0.156
[2 , 1200] loss: 0.169
[2 , 1500] loss: 0.152
[2 , 1800] loss: 0.162

Test set: Average loss: 0.0038, Accuracy: 9627/10000 (96%)

96.27
[3 ,  300] loss: 0.131
[3 ,  600] loss: 0.136
[3 ,  900] loss: 0.141
[3 , 1200] loss: 0.143
[3 , 1500] loss: 0.116
[3 , 1800] loss: 0.120

Test set: Average loss: 0.0033, Accuracy: 9677/10000 (97%)

96.77
[4 ,  300] loss: 0.110
[4 ,  600] loss: 0.106
[4 ,  900] loss: 0.111
[4 , 1200] loss: 0.110
[4 , 1500] loss: 0.101
[4 , 1800] loss: 0.105

Test set: Average loss: 0.0029, Accuracy: 9689/10000 (97%)

96.89
[5 ,  300] loss: 0.094
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.096
[5 , 1200] loss

[32m[I 2021-04-28 14:46:19,328][0m Trial 67 finished with value: 97.95 and parameters: {'lr': 0.004372279524722135, 'l1': 424, 'l2': 301}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9795/10000 (98%)

97.95
482 375 0.004737491652988563
[1 ,  300] loss: 0.825
[1 ,  600] loss: 0.375
[1 ,  900] loss: 0.295
[1 , 1200] loss: 0.266
[1 , 1500] loss: 0.230
[1 , 1800] loss: 0.228

Test set: Average loss: 0.0051, Accuracy: 9486/10000 (95%)

94.86
[2 ,  300] loss: 0.190
[2 ,  600] loss: 0.182
[2 ,  900] loss: 0.169
[2 , 1200] loss: 0.154
[2 , 1500] loss: 0.159
[2 , 1800] loss: 0.157

Test set: Average loss: 0.0043, Accuracy: 9557/10000 (96%)

95.57
[3 ,  300] loss: 0.124
[3 ,  600] loss: 0.146
[3 ,  900] loss: 0.139
[3 , 1200] loss: 0.120
[3 , 1500] loss: 0.122
[3 , 1800] loss: 0.116

Test set: Average loss: 0.0031, Accuracy: 9683/10000 (97%)

96.83
[4 ,  300] loss: 0.109
[4 ,  600] loss: 0.111
[4 ,  900] loss: 0.099
[4 , 1200] loss: 0.102
[4 , 1500] loss: 0.102
[4 , 1800] loss: 0.118

Test set: Average loss: 0.0031, Accuracy: 9685/10000 (97%)

96.85
[5 ,  300] loss: 0.094
[5 ,  600] loss: 0.082
[5 ,  900] loss: 0.089
[5 , 1200] loss

[32m[I 2021-04-28 14:48:27,629][0m Trial 68 finished with value: 97.61 and parameters: {'lr': 0.004737491652988563, 'l1': 482, 'l2': 375}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9761/10000 (98%)

97.61
457 304 0.006936526267083195
[1 ,  300] loss: 0.804
[1 ,  600] loss: 0.387
[1 ,  900] loss: 0.350
[1 , 1200] loss: 0.270
[1 , 1500] loss: 0.238
[1 , 1800] loss: 0.235

Test set: Average loss: 0.0051, Accuracy: 9484/10000 (95%)

94.84
[2 ,  300] loss: 0.206
[2 ,  600] loss: 0.187
[2 ,  900] loss: 0.178
[2 , 1200] loss: 0.176
[2 , 1500] loss: 0.192
[2 , 1800] loss: 0.172

Test set: Average loss: 0.0041, Accuracy: 9596/10000 (96%)

95.96
[3 ,  300] loss: 0.145
[3 ,  600] loss: 0.145
[3 ,  900] loss: 0.160
[3 , 1200] loss: 0.140
[3 , 1500] loss: 0.147
[3 , 1800] loss: 0.138

Test set: Average loss: 0.0034, Accuracy: 9640/10000 (96%)

96.4
[4 ,  300] loss: 0.122
[4 ,  600] loss: 0.129
[4 ,  900] loss: 0.130
[4 , 1200] loss: 0.118
[4 , 1500] loss: 0.124
[4 , 1800] loss: 0.130

Test set: Average loss: 0.0031, Accuracy: 9693/10000 (97%)

96.93
[5 ,  300] loss: 0.110
[5 ,  600] loss: 0.107
[5 ,  900] loss: 0.105
[5 , 1200] loss:

[32m[I 2021-04-28 14:50:36,808][0m Trial 69 finished with value: 97.59 and parameters: {'lr': 0.006936526267083195, 'l1': 457, 'l2': 304}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9759/10000 (98%)

97.59
427 277 0.0035179450457166203
[1 ,  300] loss: 0.922
[1 ,  600] loss: 0.389
[1 ,  900] loss: 0.296
[1 , 1200] loss: 0.265
[1 , 1500] loss: 0.242
[1 , 1800] loss: 0.225

Test set: Average loss: 0.0059, Accuracy: 9410/10000 (94%)

94.1
[2 ,  300] loss: 0.194
[2 ,  600] loss: 0.185
[2 ,  900] loss: 0.167
[2 , 1200] loss: 0.163
[2 , 1500] loss: 0.156
[2 , 1800] loss: 0.149

Test set: Average loss: 0.0040, Accuracy: 9586/10000 (96%)

95.86
[3 ,  300] loss: 0.137
[3 ,  600] loss: 0.127
[3 ,  900] loss: 0.142
[3 , 1200] loss: 0.121
[3 , 1500] loss: 0.123
[3 , 1800] loss: 0.114

Test set: Average loss: 0.0031, Accuracy: 9678/10000 (97%)

96.78
[4 ,  300] loss: 0.107
[4 ,  600] loss: 0.108
[4 ,  900] loss: 0.109
[4 , 1200] loss: 0.103
[4 , 1500] loss: 0.103
[4 , 1800] loss: 0.103

Test set: Average loss: 0.0031, Accuracy: 9685/10000 (97%)

96.85
[5 ,  300] loss: 0.091
[5 ,  600] loss: 0.091
[5 ,  900] loss: 0.084
[5 , 1200] loss

[32m[I 2021-04-28 14:52:45,534][0m Trial 70 finished with value: 97.88 and parameters: {'lr': 0.0035179450457166203, 'l1': 427, 'l2': 277}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0020, Accuracy: 9788/10000 (98%)

97.88
407 295 0.004149113468117577
[1 ,  300] loss: 0.889
[1 ,  600] loss: 0.368
[1 ,  900] loss: 0.300
[1 , 1200] loss: 0.266
[1 , 1500] loss: 0.247
[1 , 1800] loss: 0.234

Test set: Average loss: 0.0065, Accuracy: 9335/10000 (93%)

93.35
[2 ,  300] loss: 0.182
[2 ,  600] loss: 0.179
[2 ,  900] loss: 0.172
[2 , 1200] loss: 0.171
[2 , 1500] loss: 0.165
[2 , 1800] loss: 0.162

Test set: Average loss: 0.0043, Accuracy: 9566/10000 (96%)

95.66
[3 ,  300] loss: 0.138
[3 ,  600] loss: 0.132
[3 ,  900] loss: 0.137
[3 , 1200] loss: 0.134
[3 , 1500] loss: 0.124
[3 , 1800] loss: 0.127

Test set: Average loss: 0.0029, Accuracy: 9716/10000 (97%)

97.16
[4 ,  300] loss: 0.105
[4 ,  600] loss: 0.104
[4 ,  900] loss: 0.113
[4 , 1200] loss: 0.096
[4 , 1500] loss: 0.112
[4 , 1800] loss: 0.100

Test set: Average loss: 0.0031, Accuracy: 9683/10000 (97%)

96.83
[5 ,  300] loss: 0.095
[5 ,  600] loss: 0.098
[5 ,  900] loss: 0.087
[5 , 1200] loss

[32m[I 2021-04-28 14:54:55,590][0m Trial 71 finished with value: 97.92 and parameters: {'lr': 0.004149113468117577, 'l1': 407, 'l2': 295}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9792/10000 (98%)

97.92
380 264 0.0020715872608813176
[1 ,  300] loss: 1.119
[1 ,  600] loss: 0.419
[1 ,  900] loss: 0.359
[1 , 1200] loss: 0.305
[1 , 1500] loss: 0.254
[1 , 1800] loss: 0.251

Test set: Average loss: 0.0058, Accuracy: 9428/10000 (94%)

94.28
[2 ,  300] loss: 0.203
[2 ,  600] loss: 0.206
[2 ,  900] loss: 0.190
[2 , 1200] loss: 0.178
[2 , 1500] loss: 0.176
[2 , 1800] loss: 0.155

Test set: Average loss: 0.0041, Accuracy: 9595/10000 (96%)

95.95
[3 ,  300] loss: 0.139
[3 ,  600] loss: 0.149
[3 ,  900] loss: 0.140
[3 , 1200] loss: 0.134
[3 , 1500] loss: 0.133
[3 , 1800] loss: 0.126

Test set: Average loss: 0.0032, Accuracy: 9685/10000 (97%)

96.85
[4 ,  300] loss: 0.112
[4 ,  600] loss: 0.115
[4 ,  900] loss: 0.105
[4 , 1200] loss: 0.113
[4 , 1500] loss: 0.119
[4 , 1800] loss: 0.105

Test set: Average loss: 0.0030, Accuracy: 9702/10000 (97%)

97.02
[5 ,  300] loss: 0.097
[5 ,  600] loss: 0.094
[5 ,  900] loss: 0.101
[5 , 1200] los

[32m[I 2021-04-28 14:57:08,338][0m Trial 72 finished with value: 97.73 and parameters: {'lr': 0.0020715872608813176, 'l1': 380, 'l2': 264}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9773/10000 (98%)

97.73
347 142 0.002588201871059555
[1 ,  300] loss: 1.019
[1 ,  600] loss: 0.411
[1 ,  900] loss: 0.341
[1 , 1200] loss: 0.291
[1 , 1500] loss: 0.266
[1 , 1800] loss: 0.231

Test set: Average loss: 0.0063, Accuracy: 9410/10000 (94%)

94.1
[2 ,  300] loss: 0.203
[2 ,  600] loss: 0.186
[2 ,  900] loss: 0.184
[2 , 1200] loss: 0.181
[2 , 1500] loss: 0.180
[2 , 1800] loss: 0.166

Test set: Average loss: 0.0040, Accuracy: 9595/10000 (96%)

95.95
[3 ,  300] loss: 0.143
[3 ,  600] loss: 0.142
[3 ,  900] loss: 0.133
[3 , 1200] loss: 0.137
[3 , 1500] loss: 0.138
[3 , 1800] loss: 0.130

Test set: Average loss: 0.0035, Accuracy: 9632/10000 (96%)

96.32
[4 ,  300] loss: 0.118
[4 ,  600] loss: 0.102
[4 ,  900] loss: 0.116
[4 , 1200] loss: 0.121
[4 , 1500] loss: 0.115
[4 , 1800] loss: 0.111

Test set: Average loss: 0.0026, Accuracy: 9733/10000 (97%)

97.33
[5 ,  300] loss: 0.092
[5 ,  600] loss: 0.100
[5 ,  900] loss: 0.096
[5 , 1200] loss:

[32m[I 2021-04-28 14:59:20,485][0m Trial 73 finished with value: 97.91 and parameters: {'lr': 0.002588201871059555, 'l1': 347, 'l2': 142}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0022, Accuracy: 9791/10000 (98%)

97.91
331 240 0.001380379687866133
[1 ,  300] loss: 1.321
[1 ,  600] loss: 0.476
[1 ,  900] loss: 0.388
[1 , 1200] loss: 0.348
[1 , 1500] loss: 0.294
[1 , 1800] loss: 0.283

Test set: Average loss: 0.0072, Accuracy: 9329/10000 (93%)

93.29
[2 ,  300] loss: 0.249
[2 ,  600] loss: 0.232
[2 ,  900] loss: 0.224
[2 , 1200] loss: 0.204
[2 , 1500] loss: 0.201
[2 , 1800] loss: 0.192

Test set: Average loss: 0.0050, Accuracy: 9513/10000 (95%)

95.13
[3 ,  300] loss: 0.174
[3 ,  600] loss: 0.172
[3 ,  900] loss: 0.162
[3 , 1200] loss: 0.159
[3 , 1500] loss: 0.157
[3 , 1800] loss: 0.150

Test set: Average loss: 0.0040, Accuracy: 9618/10000 (96%)

96.18
[4 ,  300] loss: 0.134
[4 ,  600] loss: 0.133
[4 ,  900] loss: 0.134
[4 , 1200] loss: 0.133
[4 , 1500] loss: 0.119
[4 , 1800] loss: 0.130

Test set: Average loss: 0.0031, Accuracy: 9689/10000 (97%)

96.89
[5 ,  300] loss: 0.114
[5 ,  600] loss: 0.104
[5 ,  900] loss: 0.116
[5 , 1200] loss

[32m[I 2021-04-28 15:01:33,076][0m Trial 74 finished with value: 97.64 and parameters: {'lr': 0.001380379687866133, 'l1': 331, 'l2': 240}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0023, Accuracy: 9764/10000 (98%)

97.64
435 167 0.008386078980282726
[1 ,  300] loss: 0.788
[1 ,  600] loss: 0.413
[1 ,  900] loss: 0.345
[1 , 1200] loss: 0.291
[1 , 1500] loss: 0.286
[1 , 1800] loss: 0.291

Test set: Average loss: 0.0062, Accuracy: 9398/10000 (94%)

93.98
[2 ,  300] loss: 0.235
[2 ,  600] loss: 0.229
[2 ,  900] loss: 0.216
[2 , 1200] loss: 0.212
[2 , 1500] loss: 0.191
[2 , 1800] loss: 0.190

Test set: Average loss: 0.0051, Accuracy: 9484/10000 (95%)

94.84
[3 ,  300] loss: 0.169
[3 ,  600] loss: 0.182
[3 ,  900] loss: 0.162
[3 , 1200] loss: 0.174
[3 , 1500] loss: 0.167
[3 , 1800] loss: 0.169

Test set: Average loss: 0.0036, Accuracy: 9653/10000 (97%)

96.53
[4 ,  300] loss: 0.154
[4 ,  600] loss: 0.156
[4 ,  900] loss: 0.138
[4 , 1200] loss: 0.140
[4 , 1500] loss: 0.135
[4 , 1800] loss: 0.132

Test set: Average loss: 0.0036, Accuracy: 9647/10000 (96%)

96.47
[5 ,  300] loss: 0.135
[5 ,  600] loss: 0.127
[5 ,  900] loss: 0.130
[5 , 1200] loss

[32m[I 2021-04-28 15:03:41,771][0m Trial 75 finished with value: 97.76 and parameters: {'lr': 0.008386078980282726, 'l1': 435, 'l2': 167}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0026, Accuracy: 9776/10000 (98%)

97.76
395 313 0.005721849576177994
[1 ,  300] loss: 0.814
[1 ,  600] loss: 0.384
[1 ,  900] loss: 0.306
[1 , 1200] loss: 0.254
[1 , 1500] loss: 0.234
[1 , 1800] loss: 0.230

Test set: Average loss: 0.0062, Accuracy: 9330/10000 (93%)

93.3
[2 ,  300] loss: 0.226
[2 ,  600] loss: 0.176
[2 ,  900] loss: 0.191
[2 , 1200] loss: 0.177
[2 , 1500] loss: 0.175
[2 , 1800] loss: 0.170

Test set: Average loss: 0.0039, Accuracy: 9617/10000 (96%)

96.17
[3 ,  300] loss: 0.134
[3 ,  600] loss: 0.144
[3 ,  900] loss: 0.131
[3 , 1200] loss: 0.134
[3 , 1500] loss: 0.135
[3 , 1800] loss: 0.150

Test set: Average loss: 0.0035, Accuracy: 9644/10000 (96%)

96.44
[4 ,  300] loss: 0.117
[4 ,  600] loss: 0.116
[4 ,  900] loss: 0.119
[4 , 1200] loss: 0.115
[4 , 1500] loss: 0.106
[4 , 1800] loss: 0.117

Test set: Average loss: 0.0033, Accuracy: 9684/10000 (97%)

96.84
[5 ,  300] loss: 0.102
[5 ,  600] loss: 0.098
[5 ,  900] loss: 0.098
[5 , 1200] loss:

[32m[I 2021-04-28 15:05:55,197][0m Trial 76 finished with value: 97.86 and parameters: {'lr': 0.005721849576177994, 'l1': 395, 'l2': 313}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0024, Accuracy: 9786/10000 (98%)

97.86
417 341 0.00295419927494461
[1 ,  300] loss: 0.960
[1 ,  600] loss: 0.389
[1 ,  900] loss: 0.321
[1 , 1200] loss: 0.280
[1 , 1500] loss: 0.266
[1 , 1800] loss: 0.234

Test set: Average loss: 0.0052, Accuracy: 9512/10000 (95%)

95.12
[2 ,  300] loss: 0.199
[2 ,  600] loss: 0.191
[2 ,  900] loss: 0.184
[2 , 1200] loss: 0.168
[2 , 1500] loss: 0.162
[2 , 1800] loss: 0.165

Test set: Average loss: 0.0038, Accuracy: 9606/10000 (96%)

96.06
[3 ,  300] loss: 0.132
[3 ,  600] loss: 0.138
[3 ,  900] loss: 0.121
[3 , 1200] loss: 0.129
[3 , 1500] loss: 0.133
[3 , 1800] loss: 0.124

Test set: Average loss: 0.0032, Accuracy: 9671/10000 (97%)

96.71
[4 ,  300] loss: 0.107
[4 ,  600] loss: 0.112
[4 ,  900] loss: 0.102
[4 , 1200] loss: 0.110
[4 , 1500] loss: 0.100
[4 , 1800] loss: 0.107

Test set: Average loss: 0.0037, Accuracy: 9648/10000 (96%)

96.48
[5 ,  300] loss: 0.091
[5 ,  600] loss: 0.090
[5 ,  900] loss: 0.089
[5 , 1200] loss:

[32m[I 2021-04-28 15:08:05,379][0m Trial 77 finished with value: 98.02 and parameters: {'lr': 0.00295419927494461, 'l1': 417, 'l2': 341}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9802/10000 (98%)

98.02
498 351 0.011849262431781691
[1 ,  300] loss: 0.796
[1 ,  600] loss: 0.425
[1 ,  900] loss: 0.386
[1 , 1200] loss: 0.329
[1 , 1500] loss: 0.349
[1 , 1800] loss: 0.306

Test set: Average loss: 0.0074, Accuracy: 9247/10000 (92%)

92.47
[2 ,  300] loss: 0.265
[2 ,  600] loss: 0.272
[2 ,  900] loss: 0.283
[2 , 1200] loss: 0.284
[2 , 1500] loss: 0.266
[2 , 1800] loss: 0.239

Test set: Average loss: 0.0056, Accuracy: 9441/10000 (94%)

94.41
[3 ,  300] loss: 0.243
[3 ,  600] loss: 0.223
[3 ,  900] loss: 0.219
[3 , 1200] loss: 0.223
[3 , 1500] loss: 0.214
[3 , 1800] loss: 0.182

Test set: Average loss: 0.0054, Accuracy: 9475/10000 (95%)

94.75
[4 ,  300] loss: 0.186
[4 ,  600] loss: 0.188
[4 ,  900] loss: 0.188
[4 , 1200] loss: 0.187
[4 , 1500] loss: 0.207
[4 , 1800] loss: 0.179

Test set: Average loss: 0.0037, Accuracy: 9632/10000 (96%)

96.32
[5 ,  300] loss: 0.170
[5 ,  600] loss: 0.180
[5 ,  900] loss: 0.154
[5 , 1200] loss

[32m[I 2021-04-28 15:10:16,702][0m Trial 78 finished with value: 97.12 and parameters: {'lr': 0.011849262431781691, 'l1': 498, 'l2': 351}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0033, Accuracy: 9712/10000 (97%)

97.12
420 334 0.0031650429948679145
[1 ,  300] loss: 0.941
[1 ,  600] loss: 0.407
[1 ,  900] loss: 0.343
[1 , 1200] loss: 0.259
[1 , 1500] loss: 0.235
[1 , 1800] loss: 0.223

Test set: Average loss: 0.0057, Accuracy: 9438/10000 (94%)

94.38
[2 ,  300] loss: 0.180
[2 ,  600] loss: 0.176
[2 ,  900] loss: 0.178
[2 , 1200] loss: 0.176
[2 , 1500] loss: 0.167
[2 , 1800] loss: 0.156

Test set: Average loss: 0.0045, Accuracy: 9539/10000 (95%)

95.39
[3 ,  300] loss: 0.138
[3 ,  600] loss: 0.136
[3 ,  900] loss: 0.125
[3 , 1200] loss: 0.128
[3 , 1500] loss: 0.123
[3 , 1800] loss: 0.123

Test set: Average loss: 0.0035, Accuracy: 9656/10000 (97%)

96.56
[4 ,  300] loss: 0.117
[4 ,  600] loss: 0.096
[4 ,  900] loss: 0.113
[4 , 1200] loss: 0.104
[4 , 1500] loss: 0.101
[4 , 1800] loss: 0.108

Test set: Average loss: 0.0029, Accuracy: 9682/10000 (97%)

96.82
[5 ,  300] loss: 0.088
[5 ,  600] loss: 0.092
[5 ,  900] loss: 0.091
[5 , 1200] los

[32m[I 2021-04-28 15:12:27,008][0m Trial 79 finished with value: 97.89 and parameters: {'lr': 0.0031650429948679145, 'l1': 420, 'l2': 334}. Best is trial 14 with value: 98.04.[0m



Test set: Average loss: 0.0021, Accuracy: 9789/10000 (98%)

97.89
449 385 0.0036822177281177247
[1 ,  300] loss: 0.876
[1 ,  600] loss: 0.384
[1 ,  900] loss: 0.286
[1 , 1200] loss: 0.275
[1 , 1500] loss: 0.250
[1 , 1800] loss: 0.221

Test set: Average loss: 0.0056, Accuracy: 9461/10000 (95%)

94.61
[2 ,  300] loss: 0.187
[2 ,  600] loss: 0.181
[2 ,  900] loss: 0.167
[2 , 1200] loss: 0.150
[2 , 1500] loss: 0.169
[2 , 1800] loss: 0.148

Test set: Average loss: 0.0041, Accuracy: 9600/10000 (96%)

96.0
[3 ,  300] loss: 0.139
[3 ,  600] loss: 0.142
[3 ,  900] loss: 0.127
[3 , 1200] loss: 0.121
[3 , 1500] loss: 0.119
[3 , 1800] loss: 0.117

Test set: Average loss: 0.0031, Accuracy: 9692/10000 (97%)

96.92
[4 ,  300] loss: 0.104
[4 ,  600] loss: 0.107
[4 ,  900] loss: 0.099
[4 , 1200] loss: 0.110
[4 , 1500] loss: 0.088
[4 , 1800] loss: 0.111

Test set: Average loss: 0.0028, Accuracy: 9718/10000 (97%)

97.18
[5 ,  300] loss: 0.086
[5 ,  600] loss: 0.088
[5 ,  900] loss: 0.088
[5 , 1200] loss

[32m[I 2021-04-28 15:14:36,952][0m Trial 80 finished with value: 98.14 and parameters: {'lr': 0.0036822177281177247, 'l1': 449, 'l2': 385}. Best is trial 80 with value: 98.14.[0m



Test set: Average loss: 0.0018, Accuracy: 9814/10000 (98%)

98.14
467 388 0.004127684479071705
[1 ,  300] loss: 0.856
[1 ,  600] loss: 0.370
[1 ,  900] loss: 0.293
[1 , 1200] loss: 0.269
[1 , 1500] loss: 0.244
[1 , 1800] loss: 0.211

Test set: Average loss: 0.0050, Accuracy: 9526/10000 (95%)

95.26
[2 ,  300] loss: 0.167
[2 ,  600] loss: 0.175
[2 ,  900] loss: 0.170
[2 , 1200] loss: 0.161
[2 , 1500] loss: 0.156
[2 , 1800] loss: 0.152

Test set: Average loss: 0.0034, Accuracy: 9671/10000 (97%)

96.71
[3 ,  300] loss: 0.137
[3 ,  600] loss: 0.137
[3 ,  900] loss: 0.118
[3 , 1200] loss: 0.129
[3 , 1500] loss: 0.124
[3 , 1800] loss: 0.118

Test set: Average loss: 0.0033, Accuracy: 9673/10000 (97%)

96.73
[4 ,  300] loss: 0.099
[4 ,  600] loss: 0.109
[4 ,  900] loss: 0.102
[4 , 1200] loss: 0.104
[4 , 1500] loss: 0.100
[4 , 1800] loss: 0.105

Test set: Average loss: 0.0029, Accuracy: 9717/10000 (97%)

97.17
[5 ,  300] loss: 0.076
[5 ,  600] loss: 0.098
[5 ,  900] loss: 0.106
[5 , 1200] loss

[32m[I 2021-04-28 15:16:48,782][0m Trial 81 finished with value: 97.89 and parameters: {'lr': 0.004127684479071705, 'l1': 467, 'l2': 388}. Best is trial 80 with value: 98.14.[0m



Test set: Average loss: 0.0022, Accuracy: 9789/10000 (98%)

97.89
451 450 0.005045297126172444
[1 ,  300] loss: 0.838
[1 ,  600] loss: 0.359
[1 ,  900] loss: 0.310
[1 , 1200] loss: 0.264
[1 , 1500] loss: 0.236
[1 , 1800] loss: 0.224

Test set: Average loss: 0.0043, Accuracy: 9568/10000 (96%)

95.68
[2 ,  300] loss: 0.184
[2 ,  600] loss: 0.184
[2 ,  900] loss: 0.165
[2 , 1200] loss: 0.166
[2 , 1500] loss: 0.170
[2 , 1800] loss: 0.167

Test set: Average loss: 0.0041, Accuracy: 9568/10000 (96%)

95.68
[3 ,  300] loss: 0.138
[3 ,  600] loss: 0.132
[3 ,  900] loss: 0.140
[3 , 1200] loss: 0.135
[3 , 1500] loss: 0.124
[3 , 1800] loss: 0.130

Test set: Average loss: 0.0034, Accuracy: 9640/10000 (96%)

96.4
[4 ,  300] loss: 0.107
[4 ,  600] loss: 0.110
[4 ,  900] loss: 0.118
[4 , 1200] loss: 0.091
[4 , 1500] loss: 0.117
[4 , 1800] loss: 0.107

Test set: Average loss: 0.0028, Accuracy: 9722/10000 (97%)

97.22
[5 ,  300] loss: 0.092
[5 ,  600] loss: 0.092
[5 ,  900] loss: 0.095
[5 , 1200] loss:

[32m[I 2021-04-28 15:18:57,430][0m Trial 82 finished with value: 97.82 and parameters: {'lr': 0.005045297126172444, 'l1': 451, 'l2': 450}. Best is trial 80 with value: 98.14.[0m



Test set: Average loss: 0.0021, Accuracy: 9782/10000 (98%)

97.82
367 333 0.00290310123228489
[1 ,  300] loss: 0.981
[1 ,  600] loss: 0.399
[1 ,  900] loss: 0.328
[1 , 1200] loss: 0.279
[1 , 1500] loss: 0.260
[1 , 1800] loss: 0.239

Test set: Average loss: 0.0051, Accuracy: 9517/10000 (95%)

95.17
[2 ,  300] loss: 0.195
[2 ,  600] loss: 0.175
[2 ,  900] loss: 0.182
[2 , 1200] loss: 0.165
[2 , 1500] loss: 0.171
[2 , 1800] loss: 0.161

Test set: Average loss: 0.0038, Accuracy: 9623/10000 (96%)

96.23
[3 ,  300] loss: 0.142
[3 ,  600] loss: 0.133
[3 ,  900] loss: 0.140
[3 , 1200] loss: 0.137
[3 , 1500] loss: 0.128
[3 , 1800] loss: 0.123

Test set: Average loss: 0.0031, Accuracy: 9696/10000 (97%)

96.96
[4 ,  300] loss: 0.100
[4 ,  600] loss: 0.105
[4 ,  900] loss: 0.123
[4 , 1200] loss: 0.113
[4 , 1500] loss: 0.106
[4 , 1800] loss: 0.104

Test set: Average loss: 0.0029, Accuracy: 9697/10000 (97%)

96.97
[5 ,  300] loss: 0.099
[5 ,  600] loss: 0.098
[5 ,  900] loss: 0.091
[5 , 1200] loss:

[32m[I 2021-04-28 15:21:08,752][0m Trial 83 finished with value: 98.18 and parameters: {'lr': 0.00290310123228489, 'l1': 367, 'l2': 333}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0019, Accuracy: 9818/10000 (98%)

98.18
367 423 0.00276863841476844
[1 ,  300] loss: 0.958
[1 ,  600] loss: 0.391
[1 ,  900] loss: 0.346
[1 , 1200] loss: 0.269
[1 , 1500] loss: 0.255
[1 , 1800] loss: 0.232

Test set: Average loss: 0.0051, Accuracy: 9482/10000 (95%)

94.82
[2 ,  300] loss: 0.188
[2 ,  600] loss: 0.190
[2 ,  900] loss: 0.185
[2 , 1200] loss: 0.160
[2 , 1500] loss: 0.155
[2 , 1800] loss: 0.153

Test set: Average loss: 0.0039, Accuracy: 9595/10000 (96%)

95.95
[3 ,  300] loss: 0.144
[3 ,  600] loss: 0.139
[3 ,  900] loss: 0.134
[3 , 1200] loss: 0.131
[3 , 1500] loss: 0.121
[3 , 1800] loss: 0.114

Test set: Average loss: 0.0033, Accuracy: 9658/10000 (97%)

96.58
[4 ,  300] loss: 0.106
[4 ,  600] loss: 0.117
[4 ,  900] loss: 0.098
[4 , 1200] loss: 0.114
[4 , 1500] loss: 0.108
[4 , 1800] loss: 0.097

Test set: Average loss: 0.0028, Accuracy: 9716/10000 (97%)

97.16
[5 ,  300] loss: 0.085
[5 ,  600] loss: 0.105
[5 ,  900] loss: 0.090
[5 , 1200] loss:

[32m[I 2021-04-28 15:23:17,562][0m Trial 84 finished with value: 97.87 and parameters: {'lr': 0.00276863841476844, 'l1': 367, 'l2': 423}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0020, Accuracy: 9787/10000 (98%)

97.87
412 369 0.0018379869902441816
[1 ,  300] loss: 1.135
[1 ,  600] loss: 0.424
[1 ,  900] loss: 0.354
[1 , 1200] loss: 0.321
[1 , 1500] loss: 0.269
[1 , 1800] loss: 0.250

Test set: Average loss: 0.0063, Accuracy: 9375/10000 (94%)

93.75
[2 ,  300] loss: 0.218
[2 ,  600] loss: 0.212
[2 ,  900] loss: 0.196
[2 , 1200] loss: 0.179
[2 , 1500] loss: 0.176
[2 , 1800] loss: 0.172

Test set: Average loss: 0.0043, Accuracy: 9585/10000 (96%)

95.85
[3 ,  300] loss: 0.154
[3 ,  600] loss: 0.141
[3 ,  900] loss: 0.141
[3 , 1200] loss: 0.141
[3 , 1500] loss: 0.141
[3 , 1800] loss: 0.126

Test set: Average loss: 0.0035, Accuracy: 9655/10000 (97%)

96.55
[4 ,  300] loss: 0.122
[4 ,  600] loss: 0.114
[4 ,  900] loss: 0.115
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.112
[4 , 1800] loss: 0.114

Test set: Average loss: 0.0036, Accuracy: 9653/10000 (97%)

96.53
[5 ,  300] loss: 0.096
[5 ,  600] loss: 0.096
[5 ,  900] loss: 0.099
[5 , 1200] los

[32m[I 2021-04-28 15:25:26,801][0m Trial 85 finished with value: 97.95 and parameters: {'lr': 0.0018379869902441816, 'l1': 412, 'l2': 369}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0021, Accuracy: 9795/10000 (98%)

97.95
313 410 0.001789996729780151
[1 ,  300] loss: 1.163
[1 ,  600] loss: 0.437
[1 ,  900] loss: 0.373
[1 , 1200] loss: 0.322
[1 , 1500] loss: 0.281
[1 , 1800] loss: 0.255

Test set: Average loss: 0.0066, Accuracy: 9342/10000 (93%)

93.42
[2 ,  300] loss: 0.231
[2 ,  600] loss: 0.212
[2 ,  900] loss: 0.191
[2 , 1200] loss: 0.198
[2 , 1500] loss: 0.183
[2 , 1800] loss: 0.177

Test set: Average loss: 0.0043, Accuracy: 9579/10000 (96%)

95.79
[3 ,  300] loss: 0.164
[3 ,  600] loss: 0.154
[3 ,  900] loss: 0.144
[3 , 1200] loss: 0.140
[3 , 1500] loss: 0.137
[3 , 1800] loss: 0.134

Test set: Average loss: 0.0036, Accuracy: 9631/10000 (96%)

96.31
[4 ,  300] loss: 0.118
[4 ,  600] loss: 0.119
[4 ,  900] loss: 0.119
[4 , 1200] loss: 0.122
[4 , 1500] loss: 0.112
[4 , 1800] loss: 0.121

Test set: Average loss: 0.0028, Accuracy: 9711/10000 (97%)

97.11
[5 ,  300] loss: 0.105
[5 ,  600] loss: 0.097
[5 ,  900] loss: 0.100
[5 , 1200] loss

[32m[I 2021-04-28 15:27:35,146][0m Trial 86 finished with value: 97.73 and parameters: {'lr': 0.001789996729780151, 'l1': 313, 'l2': 410}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0022, Accuracy: 9773/10000 (98%)

97.73
387 380 0.0009018188139615189
[1 ,  300] loss: 1.545
[1 ,  600] loss: 0.549
[1 ,  900] loss: 0.409
[1 , 1200] loss: 0.395
[1 , 1500] loss: 0.319
[1 , 1800] loss: 0.330

Test set: Average loss: 0.0083, Accuracy: 9248/10000 (92%)

92.48
[2 ,  300] loss: 0.291
[2 ,  600] loss: 0.280
[2 ,  900] loss: 0.251
[2 , 1200] loss: 0.249
[2 , 1500] loss: 0.222
[2 , 1800] loss: 0.227

Test set: Average loss: 0.0058, Accuracy: 9456/10000 (95%)

94.56
[3 ,  300] loss: 0.205
[3 ,  600] loss: 0.199
[3 ,  900] loss: 0.185
[3 , 1200] loss: 0.171
[3 , 1500] loss: 0.173
[3 , 1800] loss: 0.168

Test set: Average loss: 0.0046, Accuracy: 9569/10000 (96%)

95.69
[4 ,  300] loss: 0.152
[4 ,  600] loss: 0.149
[4 ,  900] loss: 0.157
[4 , 1200] loss: 0.138
[4 , 1500] loss: 0.140
[4 , 1800] loss: 0.142

Test set: Average loss: 0.0037, Accuracy: 9655/10000 (97%)

96.55
[5 ,  300] loss: 0.125
[5 ,  600] loss: 0.121
[5 ,  900] loss: 0.139
[5 , 1200] los

[32m[I 2021-04-28 15:29:43,885][0m Trial 87 finished with value: 97.54 and parameters: {'lr': 0.0009018188139615189, 'l1': 387, 'l2': 380}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0023, Accuracy: 9754/10000 (98%)

97.54
374 328 0.003643445509421468
[1 ,  300] loss: 0.907
[1 ,  600] loss: 0.390
[1 ,  900] loss: 0.312
[1 , 1200] loss: 0.290
[1 , 1500] loss: 0.252
[1 , 1800] loss: 0.220

Test set: Average loss: 0.0049, Accuracy: 9530/10000 (95%)

95.3
[2 ,  300] loss: 0.187
[2 ,  600] loss: 0.189
[2 ,  900] loss: 0.164
[2 , 1200] loss: 0.171
[2 , 1500] loss: 0.166
[2 , 1800] loss: 0.170

Test set: Average loss: 0.0041, Accuracy: 9598/10000 (96%)

95.98
[3 ,  300] loss: 0.137
[3 ,  600] loss: 0.136
[3 ,  900] loss: 0.142
[3 , 1200] loss: 0.132
[3 , 1500] loss: 0.128
[3 , 1800] loss: 0.119

Test set: Average loss: 0.0036, Accuracy: 9643/10000 (96%)

96.43
[4 ,  300] loss: 0.115
[4 ,  600] loss: 0.122
[4 ,  900] loss: 0.107
[4 , 1200] loss: 0.115
[4 , 1500] loss: 0.103
[4 , 1800] loss: 0.103

Test set: Average loss: 0.0031, Accuracy: 9673/10000 (97%)

96.73
[5 ,  300] loss: 0.087
[5 ,  600] loss: 0.092
[5 ,  900] loss: 0.101
[5 , 1200] loss:

[32m[I 2021-04-28 15:31:52,079][0m Trial 88 finished with value: 97.92 and parameters: {'lr': 0.003643445509421468, 'l1': 374, 'l2': 328}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0022, Accuracy: 9792/10000 (98%)

97.92
444 394 0.0015678636712549213
[1 ,  300] loss: 1.170
[1 ,  600] loss: 0.446
[1 ,  900] loss: 0.363
[1 , 1200] loss: 0.318
[1 , 1500] loss: 0.283
[1 , 1800] loss: 0.248

Test set: Average loss: 0.0065, Accuracy: 9378/10000 (94%)

93.78
[2 ,  300] loss: 0.235
[2 ,  600] loss: 0.217
[2 ,  900] loss: 0.182
[2 , 1200] loss: 0.187
[2 , 1500] loss: 0.180
[2 , 1800] loss: 0.191

Test set: Average loss: 0.0048, Accuracy: 9529/10000 (95%)

95.29
[3 ,  300] loss: 0.160
[3 ,  600] loss: 0.151
[3 ,  900] loss: 0.143
[3 , 1200] loss: 0.142
[3 , 1500] loss: 0.136
[3 , 1800] loss: 0.136

Test set: Average loss: 0.0034, Accuracy: 9656/10000 (97%)

96.56
[4 ,  300] loss: 0.110
[4 ,  600] loss: 0.112
[4 ,  900] loss: 0.112
[4 , 1200] loss: 0.123
[4 , 1500] loss: 0.121
[4 , 1800] loss: 0.116

Test set: Average loss: 0.0033, Accuracy: 9659/10000 (97%)

96.59
[5 ,  300] loss: 0.102
[5 ,  600] loss: 0.089
[5 ,  900] loss: 0.094
[5 , 1200] los

[32m[I 2021-04-28 15:33:59,298][0m Trial 89 finished with value: 97.96 and parameters: {'lr': 0.0015678636712549213, 'l1': 444, 'l2': 394}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0021, Accuracy: 9796/10000 (98%)

97.96
260 439 0.001158915292387706
[1 ,  300] loss: 1.397
[1 ,  600] loss: 0.509
[1 ,  900] loss: 0.386
[1 , 1200] loss: 0.366
[1 , 1500] loss: 0.328
[1 , 1800] loss: 0.305

Test set: Average loss: 0.0075, Accuracy: 9279/10000 (93%)

92.79
[2 ,  300] loss: 0.281
[2 ,  600] loss: 0.250
[2 ,  900] loss: 0.237
[2 , 1200] loss: 0.211
[2 , 1500] loss: 0.215
[2 , 1800] loss: 0.196

Test set: Average loss: 0.0051, Accuracy: 9497/10000 (95%)

94.97
[3 ,  300] loss: 0.180
[3 ,  600] loss: 0.176
[3 ,  900] loss: 0.181
[3 , 1200] loss: 0.166
[3 , 1500] loss: 0.154
[3 , 1800] loss: 0.161

Test set: Average loss: 0.0044, Accuracy: 9574/10000 (96%)

95.74
[4 ,  300] loss: 0.154
[4 ,  600] loss: 0.134
[4 ,  900] loss: 0.144
[4 , 1200] loss: 0.134
[4 , 1500] loss: 0.146
[4 , 1800] loss: 0.136

Test set: Average loss: 0.0035, Accuracy: 9669/10000 (97%)

96.69
[5 ,  300] loss: 0.130
[5 ,  600] loss: 0.123
[5 ,  900] loss: 0.119
[5 , 1200] loss

[32m[I 2021-04-28 15:36:06,613][0m Trial 90 finished with value: 97.78 and parameters: {'lr': 0.001158915292387706, 'l1': 260, 'l2': 439}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0024, Accuracy: 9778/10000 (98%)

97.78
357 361 0.0022351407389333377
[1 ,  300] loss: 1.056
[1 ,  600] loss: 0.413
[1 ,  900] loss: 0.331
[1 , 1200] loss: 0.306
[1 , 1500] loss: 0.265
[1 , 1800] loss: 0.249

Test set: Average loss: 0.0056, Accuracy: 9449/10000 (94%)

94.49
[2 ,  300] loss: 0.213
[2 ,  600] loss: 0.197
[2 ,  900] loss: 0.177
[2 , 1200] loss: 0.186
[2 , 1500] loss: 0.169
[2 , 1800] loss: 0.161

Test set: Average loss: 0.0044, Accuracy: 9555/10000 (96%)

95.55
[3 ,  300] loss: 0.145
[3 ,  600] loss: 0.141
[3 ,  900] loss: 0.154
[3 , 1200] loss: 0.133
[3 , 1500] loss: 0.128
[3 , 1800] loss: 0.126

Test set: Average loss: 0.0032, Accuracy: 9682/10000 (97%)

96.82
[4 ,  300] loss: 0.110
[4 ,  600] loss: 0.111
[4 ,  900] loss: 0.102
[4 , 1200] loss: 0.117
[4 , 1500] loss: 0.117
[4 , 1800] loss: 0.121

Test set: Average loss: 0.0032, Accuracy: 9680/10000 (97%)

96.8
[5 ,  300] loss: 0.098
[5 ,  600] loss: 0.093
[5 ,  900] loss: 0.099
[5 , 1200] loss

[32m[I 2021-04-28 15:38:13,322][0m Trial 91 finished with value: 97.78 and parameters: {'lr': 0.0022351407389333377, 'l1': 357, 'l2': 361}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0023, Accuracy: 9778/10000 (98%)

97.78
435 338 0.0015445466999561464
[1 ,  300] loss: 1.213
[1 ,  600] loss: 0.464
[1 ,  900] loss: 0.389
[1 , 1200] loss: 0.324
[1 , 1500] loss: 0.289
[1 , 1800] loss: 0.261

Test set: Average loss: 0.0068, Accuracy: 9332/10000 (93%)

93.32
[2 ,  300] loss: 0.227
[2 ,  600] loss: 0.224
[2 ,  900] loss: 0.200
[2 , 1200] loss: 0.199
[2 , 1500] loss: 0.186
[2 , 1800] loss: 0.181

Test set: Average loss: 0.0044, Accuracy: 9578/10000 (96%)

95.78
[3 ,  300] loss: 0.155
[3 ,  600] loss: 0.146
[3 ,  900] loss: 0.148
[3 , 1200] loss: 0.157
[3 , 1500] loss: 0.143
[3 , 1800] loss: 0.138

Test set: Average loss: 0.0034, Accuracy: 9656/10000 (97%)

96.56
[4 ,  300] loss: 0.122
[4 ,  600] loss: 0.113
[4 ,  900] loss: 0.115
[4 , 1200] loss: 0.116
[4 , 1500] loss: 0.122
[4 , 1800] loss: 0.112

Test set: Average loss: 0.0031, Accuracy: 9696/10000 (97%)

96.96
[5 ,  300] loss: 0.103
[5 ,  600] loss: 0.096
[5 ,  900] loss: 0.098
[5 , 1200] los

[32m[I 2021-04-28 15:40:22,107][0m Trial 92 finished with value: 97.58 and parameters: {'lr': 0.0015445466999561464, 'l1': 435, 'l2': 338}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0023, Accuracy: 9758/10000 (98%)

97.58
404 397 0.003007703579572047
[1 ,  300] loss: 0.954
[1 ,  600] loss: 0.402
[1 ,  900] loss: 0.328
[1 , 1200] loss: 0.268
[1 , 1500] loss: 0.243
[1 , 1800] loss: 0.223

Test set: Average loss: 0.0056, Accuracy: 9458/10000 (95%)

94.58
[2 ,  300] loss: 0.199
[2 ,  600] loss: 0.191
[2 ,  900] loss: 0.163
[2 , 1200] loss: 0.168
[2 , 1500] loss: 0.162
[2 , 1800] loss: 0.166

Test set: Average loss: 0.0037, Accuracy: 9624/10000 (96%)

96.24
[3 ,  300] loss: 0.147
[3 ,  600] loss: 0.142
[3 ,  900] loss: 0.127
[3 , 1200] loss: 0.129
[3 , 1500] loss: 0.125
[3 , 1800] loss: 0.125

Test set: Average loss: 0.0037, Accuracy: 9606/10000 (96%)

96.06
[4 ,  300] loss: 0.111
[4 ,  600] loss: 0.098
[4 ,  900] loss: 0.105
[4 , 1200] loss: 0.108
[4 , 1500] loss: 0.105
[4 , 1800] loss: 0.106

Test set: Average loss: 0.0028, Accuracy: 9723/10000 (97%)

97.23
[5 ,  300] loss: 0.093
[5 ,  600] loss: 0.085
[5 ,  900] loss: 0.098
[5 , 1200] loss

[32m[I 2021-04-28 15:42:29,605][0m Trial 93 finished with value: 97.61 and parameters: {'lr': 0.003007703579572047, 'l1': 404, 'l2': 397}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0024, Accuracy: 9761/10000 (98%)

97.61
445 462 0.0021395087477185425
[1 ,  300] loss: 1.038
[1 ,  600] loss: 0.412
[1 ,  900] loss: 0.361
[1 , 1200] loss: 0.302
[1 , 1500] loss: 0.261
[1 , 1800] loss: 0.242

Test set: Average loss: 0.0056, Accuracy: 9469/10000 (95%)

94.69
[2 ,  300] loss: 0.218
[2 ,  600] loss: 0.188
[2 ,  900] loss: 0.178
[2 , 1200] loss: 0.179
[2 , 1500] loss: 0.164
[2 , 1800] loss: 0.155

Test set: Average loss: 0.0045, Accuracy: 9537/10000 (95%)

95.37
[3 ,  300] loss: 0.137
[3 ,  600] loss: 0.149
[3 ,  900] loss: 0.141
[3 , 1200] loss: 0.125
[3 , 1500] loss: 0.133
[3 , 1800] loss: 0.117

Test set: Average loss: 0.0033, Accuracy: 9677/10000 (97%)

96.77
[4 ,  300] loss: 0.109
[4 ,  600] loss: 0.115
[4 ,  900] loss: 0.110
[4 , 1200] loss: 0.112
[4 , 1500] loss: 0.112
[4 , 1800] loss: 0.101

Test set: Average loss: 0.0029, Accuracy: 9713/10000 (97%)

97.13
[5 ,  300] loss: 0.093
[5 ,  600] loss: 0.097
[5 ,  900] loss: 0.087
[5 , 1200] los

[32m[I 2021-04-28 15:44:34,860][0m Trial 94 finished with value: 97.87 and parameters: {'lr': 0.0021395087477185425, 'l1': 445, 'l2': 462}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0021, Accuracy: 9787/10000 (98%)

97.87
425 343 0.00386537066070888
[1 ,  300] loss: 0.891
[1 ,  600] loss: 0.367
[1 ,  900] loss: 0.322
[1 , 1200] loss: 0.271
[1 , 1500] loss: 0.270
[1 , 1800] loss: 0.218

Test set: Average loss: 0.0060, Accuracy: 9395/10000 (94%)

93.95
[2 ,  300] loss: 0.188
[2 ,  600] loss: 0.177
[2 ,  900] loss: 0.176
[2 , 1200] loss: 0.166
[2 , 1500] loss: 0.172
[2 , 1800] loss: 0.151

Test set: Average loss: 0.0039, Accuracy: 9613/10000 (96%)

96.13
[3 ,  300] loss: 0.138
[3 ,  600] loss: 0.131
[3 ,  900] loss: 0.135
[3 , 1200] loss: 0.123
[3 , 1500] loss: 0.128
[3 , 1800] loss: 0.122

Test set: Average loss: 0.0032, Accuracy: 9689/10000 (97%)

96.89
[4 ,  300] loss: 0.103
[4 ,  600] loss: 0.109
[4 ,  900] loss: 0.108
[4 , 1200] loss: 0.109
[4 , 1500] loss: 0.108
[4 , 1800] loss: 0.104

Test set: Average loss: 0.0029, Accuracy: 9685/10000 (97%)

96.85
[5 ,  300] loss: 0.084
[5 ,  600] loss: 0.099
[5 ,  900] loss: 0.097
[5 , 1200] loss:

[32m[I 2021-04-28 15:46:42,416][0m Trial 95 finished with value: 98.06 and parameters: {'lr': 0.00386537066070888, 'l1': 425, 'l2': 343}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0019, Accuracy: 9806/10000 (98%)

98.06
473 344 0.001444293383094788
[1 ,  300] loss: 1.230
[1 ,  600] loss: 0.467
[1 ,  900] loss: 0.375
[1 , 1200] loss: 0.313
[1 , 1500] loss: 0.308
[1 , 1800] loss: 0.263

Test set: Average loss: 0.0072, Accuracy: 9287/10000 (93%)

92.87
[2 ,  300] loss: 0.241
[2 ,  600] loss: 0.221
[2 ,  900] loss: 0.207
[2 , 1200] loss: 0.197
[2 , 1500] loss: 0.178
[2 , 1800] loss: 0.172

Test set: Average loss: 0.0046, Accuracy: 9558/10000 (96%)

95.58
[3 ,  300] loss: 0.166
[3 ,  600] loss: 0.161
[3 ,  900] loss: 0.146
[3 , 1200] loss: 0.152
[3 , 1500] loss: 0.134
[3 , 1800] loss: 0.136

Test set: Average loss: 0.0036, Accuracy: 9630/10000 (96%)

96.3
[4 ,  300] loss: 0.131
[4 ,  600] loss: 0.120
[4 ,  900] loss: 0.123
[4 , 1200] loss: 0.112
[4 , 1500] loss: 0.115
[4 , 1800] loss: 0.109

Test set: Average loss: 0.0033, Accuracy: 9658/10000 (97%)

96.58
[5 ,  300] loss: 0.105
[5 ,  600] loss: 0.102
[5 ,  900] loss: 0.101
[5 , 1200] loss:

[32m[I 2021-04-28 15:48:49,390][0m Trial 96 finished with value: 97.77 and parameters: {'lr': 0.001444293383094788, 'l1': 473, 'l2': 344}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0022, Accuracy: 9777/10000 (98%)

97.77
32 418 0.0037405224366982267
[1 ,  300] loss: 1.064
[1 ,  600] loss: 0.587
[1 ,  900] loss: 0.503
[1 , 1200] loss: 0.464
[1 , 1500] loss: 0.449
[1 , 1800] loss: 0.420

Test set: Average loss: 0.0088, Accuracy: 9082/10000 (91%)

90.82
[2 ,  300] loss: 0.382
[2 ,  600] loss: 0.384
[2 ,  900] loss: 0.355
[2 , 1200] loss: 0.356
[2 , 1500] loss: 0.346
[2 , 1800] loss: 0.320

Test set: Average loss: 0.0061, Accuracy: 9390/10000 (94%)

93.9
[3 ,  300] loss: 0.315
[3 ,  600] loss: 0.317
[3 ,  900] loss: 0.323
[3 , 1200] loss: 0.309
[3 , 1500] loss: 0.308
[3 , 1800] loss: 0.312

Test set: Average loss: 0.0060, Accuracy: 9403/10000 (94%)

94.03
[4 ,  300] loss: 0.304
[4 ,  600] loss: 0.271
[4 ,  900] loss: 0.276
[4 , 1200] loss: 0.285
[4 , 1500] loss: 0.268
[4 , 1800] loss: 0.288

Test set: Average loss: 0.0052, Accuracy: 9478/10000 (95%)

94.78
[5 ,  300] loss: 0.267
[5 ,  600] loss: 0.252
[5 ,  900] loss: 0.265
[5 , 1200] loss:

[32m[I 2021-04-28 15:50:57,021][0m Trial 97 finished with value: 95.88 and parameters: {'lr': 0.0037405224366982267, 'l1': 32, 'l2': 418}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0045, Accuracy: 9588/10000 (96%)

95.88
340 392 0.002721189799036355
[1 ,  300] loss: 0.991
[1 ,  600] loss: 0.400
[1 ,  900] loss: 0.335
[1 , 1200] loss: 0.284
[1 , 1500] loss: 0.243
[1 , 1800] loss: 0.246

Test set: Average loss: 0.0054, Accuracy: 9473/10000 (95%)

94.73
[2 ,  300] loss: 0.189
[2 ,  600] loss: 0.190
[2 ,  900] loss: 0.192
[2 , 1200] loss: 0.163
[2 , 1500] loss: 0.164
[2 , 1800] loss: 0.164

Test set: Average loss: 0.0038, Accuracy: 9619/10000 (96%)

96.19
[3 ,  300] loss: 0.148
[3 ,  600] loss: 0.133
[3 ,  900] loss: 0.141
[3 , 1200] loss: 0.140
[3 , 1500] loss: 0.133
[3 , 1800] loss: 0.125

Test set: Average loss: 0.0031, Accuracy: 9690/10000 (97%)

96.9
[4 ,  300] loss: 0.112
[4 ,  600] loss: 0.122
[4 ,  900] loss: 0.121
[4 , 1200] loss: 0.107
[4 , 1500] loss: 0.104
[4 , 1800] loss: 0.101

Test set: Average loss: 0.0028, Accuracy: 9719/10000 (97%)

97.19
[5 ,  300] loss: 0.092
[5 ,  600] loss: 0.097
[5 ,  900] loss: 0.102
[5 , 1200] loss:

[32m[I 2021-04-28 15:53:04,719][0m Trial 98 finished with value: 97.69 and parameters: {'lr': 0.002721189799036355, 'l1': 340, 'l2': 392}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0023, Accuracy: 9769/10000 (98%)

97.69
493 326 0.006793997188593686
[1 ,  300] loss: 0.779
[1 ,  600] loss: 0.383
[1 ,  900] loss: 0.305
[1 , 1200] loss: 0.290
[1 , 1500] loss: 0.231
[1 , 1800] loss: 0.227

Test set: Average loss: 0.0062, Accuracy: 9345/10000 (93%)

93.45
[2 ,  300] loss: 0.193
[2 ,  600] loss: 0.183
[2 ,  900] loss: 0.187
[2 , 1200] loss: 0.171
[2 , 1500] loss: 0.182
[2 , 1800] loss: 0.172

Test set: Average loss: 0.0043, Accuracy: 9572/10000 (96%)

95.72
[3 ,  300] loss: 0.146
[3 ,  600] loss: 0.146
[3 ,  900] loss: 0.142
[3 , 1200] loss: 0.134
[3 , 1500] loss: 0.124
[3 , 1800] loss: 0.129

Test set: Average loss: 0.0034, Accuracy: 9643/10000 (96%)

96.43
[4 ,  300] loss: 0.107
[4 ,  600] loss: 0.122
[4 ,  900] loss: 0.118
[4 , 1200] loss: 0.124
[4 , 1500] loss: 0.110
[4 , 1800] loss: 0.104

Test set: Average loss: 0.0038, Accuracy: 9630/10000 (96%)

96.3
[5 ,  300] loss: 0.102
[5 ,  600] loss: 0.101
[5 ,  900] loss: 0.084
[5 , 1200] loss:

[32m[I 2021-04-28 15:55:12,676][0m Trial 99 finished with value: 97.7 and parameters: {'lr': 0.006793997188593686, 'l1': 493, 'l2': 326}. Best is trial 83 with value: 98.18.[0m



Test set: Average loss: 0.0025, Accuracy: 9770/10000 (98%)

97.7
