In [1]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from pytorch_metric_learning import losses
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm.notebook import tqdm

In [2]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/")
val_data = AdienceDataset("../val.csv", "../cropped_Adience/")

In [7]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [8]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [9]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [10]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [11]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [12]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [13]:
class ViTs_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, ac_patch_size,
                         pad, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * ac_patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size
        self.soft_split = nn.Unfold(kernel_size=(ac_patch_size, ac_patch_size), stride=(self.patch_size, self.patch_size), padding=(pad, pad))


        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'Softmax':
                self.loss = Softmax(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'ArcFace':
                self.loss = ArcFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'SFace':
                self.loss = SFaceLoss(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label= None , mask = None):
        p = self.patch_size
        x = self.soft_split(img).transpose(1, 2)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        y = x[:, 0]
        z = x[:, 1:].mean(dim = 1)

        y = self.to_latent(y)
        emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        emb = torch.cat((emb_y, emb_z), dim=1)
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [14]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=1024, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [15]:
model = ViTs_face(
            loss_type='CosFace',
            GPU_ID=[device],
            num_class=93431,
            image_size=112,
            patch_size=8,
            ac_patch_size=12,
            pad=4,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Face-Transformer/results/ViT-P12S8_ms1m_cosface/Backbone_VITs_Epoch_2_Batch_12000_Time_2021-03-17-04-05_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=0)]


<All keys matched successfully>

In [16]:
for param in model.parameters():
    param.requires_grad = False

In [17]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, _, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, _, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [18]:
best_accu = 0.0
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    loss_lr = trial.suggest_float("loss_learning_rate", 1e-4, 1e-2, log=True)
    cos_margin = losses.CosFaceLoss(2, 1024).to(device)
    loss_optimizer = opt.AdamW(cos_margin.parameters(), lr=loss_lr)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    criterion = nn.CrossEntropyLoss()
    
    batch_size = trial.suggest_int('batch_size', 50, 300)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate for Loss: "+ str(loss_lr))
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, label, file in tqdm(train_loader, desc="Training", leave=False):
            img, label = img.to(device), label.to(device)

            x = file_to_embed(embeds, file)
            
            optimizer.zero_grad()
            embed, output = model_xtr(x)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            class_loss = criterion(output, label)
            cos_loss = cos_margin(embed, label)
            loss = class_loss + cos_loss
            loss.backward()
            loss_optimizer.step()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, label, file in tqdm(val_loader):
                img, label = img.to(device), label.to(device)
                
                x = file_to_embed(embeds, file)
                
                embed, output = model_xtr(x)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                class_loss = criterion(output, label)
                cos_loss = cos_margin(embed, label)
                loss = class_loss + cos_loss
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_12-8_cosface_mean.pt")
            
    return val_accu

In [19]:
study = optuna.create_study(direction='maximize',
                            study_name='cosface-12-8-mean-vit-study',
                            storage='sqlite:///study2.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=20)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-08 01:45:02,513][0m A new study created in RDB with name: cosface-12-8-mean-vit-study[0m


Learning rate for Loss: 0.006606113390840052
Learning rate: 1.8390958150396713e-05
Weight decay: 0.0004158910861118078
Epsilon: 1.682072836926719e-08
Batch size: 259
Number of epochs: 40


Epochs:   0%|          | 0/40 [00:00<?, ?it/s]

Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 1/40 - Loss: 19.5066 - Accuracy: 0.5942


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 15.4987 - Val Accuracy: 0.6847


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 2/40 - Loss: 11.4730 - Accuracy: 0.7479


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 7.5023 - Val Accuracy: 0.8242


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 3/40 - Loss: 6.3540 - Accuracy: 0.8641


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 5.0959 - Val Accuracy: 0.8982


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 4/40 - Loss: 4.8172 - Accuracy: 0.9116


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 4.2052 - Val Accuracy: 0.9331


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 5/40 - Loss: 4.1453 - Accuracy: 0.9309


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 3.7621 - Val Accuracy: 0.9399


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 6/40 - Loss: 3.7520 - Accuracy: 0.9409


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 3.4858 - Val Accuracy: 0.9493


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 7/40 - Loss: 3.4841 - Accuracy: 0.9452


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 3.2932 - Val Accuracy: 0.9536


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 8/40 - Loss: 3.2745 - Accuracy: 0.9485


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 3.1642 - Val Accuracy: 0.9558


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 9/40 - Loss: 3.1265 - Accuracy: 0.9506


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 3.0357 - Val Accuracy: 0.9562


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 10/40 - Loss: 2.9887 - Accuracy: 0.9529


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.9154 - Val Accuracy: 0.9578


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 11/40 - Loss: 2.8731 - Accuracy: 0.9545


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.8453 - Val Accuracy: 0.9595


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 12/40 - Loss: 2.7818 - Accuracy: 0.9554


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.7775 - Val Accuracy: 0.9606


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 13/40 - Loss: 2.6955 - Accuracy: 0.9567


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.7254 - Val Accuracy: 0.9611


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 14/40 - Loss: 2.6346 - Accuracy: 0.9578


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.6624 - Val Accuracy: 0.9617


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 15/40 - Loss: 2.5641 - Accuracy: 0.9585


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.6199 - Val Accuracy: 0.9608


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 16/40 - Loss: 2.5191 - Accuracy: 0.9592


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.5746 - Val Accuracy: 0.9619


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 17/40 - Loss: 2.4569 - Accuracy: 0.9594


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.5526 - Val Accuracy: 0.9606


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 18/40 - Loss: 2.4136 - Accuracy: 0.9601


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.5237 - Val Accuracy: 0.9617


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 19/40 - Loss: 2.3680 - Accuracy: 0.9608


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.4925 - Val Accuracy: 0.9622


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 20/40 - Loss: 2.3345 - Accuracy: 0.9608


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.4819 - Val Accuracy: 0.9602


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 21/40 - Loss: 2.3070 - Accuracy: 0.9616


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.4554 - Val Accuracy: 0.9628


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 22/40 - Loss: 2.2764 - Accuracy: 0.9626


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.4525 - Val Accuracy: 0.9628


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 23/40 - Loss: 2.2354 - Accuracy: 0.9629


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.4179 - Val Accuracy: 0.9628


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 24/40 - Loss: 2.2064 - Accuracy: 0.9637


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.4001 - Val Accuracy: 0.9617


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 25/40 - Loss: 2.1780 - Accuracy: 0.9637


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3993 - Val Accuracy: 0.9633


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 26/40 - Loss: 2.1523 - Accuracy: 0.9641


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3669 - Val Accuracy: 0.9633


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 27/40 - Loss: 2.1333 - Accuracy: 0.9642


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3611 - Val Accuracy: 0.9622


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 28/40 - Loss: 2.1038 - Accuracy: 0.9649


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3577 - Val Accuracy: 0.9633


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 29/40 - Loss: 2.0812 - Accuracy: 0.9645


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3441 - Val Accuracy: 0.9628


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 30/40 - Loss: 2.0671 - Accuracy: 0.9656


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3212 - Val Accuracy: 0.9626


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 31/40 - Loss: 2.0420 - Accuracy: 0.9649


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3193 - Val Accuracy: 0.9633


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 32/40 - Loss: 2.0270 - Accuracy: 0.9658


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.2921 - Val Accuracy: 0.9626


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 33/40 - Loss: 2.0077 - Accuracy: 0.9666


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3179 - Val Accuracy: 0.9606


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 34/40 - Loss: 1.9859 - Accuracy: 0.9665


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.2916 - Val Accuracy: 0.9622


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 35/40 - Loss: 1.9633 - Accuracy: 0.9667


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3030 - Val Accuracy: 0.9606


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 36/40 - Loss: 1.9544 - Accuracy: 0.9670


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.3021 - Val Accuracy: 0.9632


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 37/40 - Loss: 1.9348 - Accuracy: 0.9675


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.2646 - Val Accuracy: 0.9617


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 38/40 - Loss: 1.9214 - Accuracy: 0.9674


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.2860 - Val Accuracy: 0.9606


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 39/40 - Loss: 1.9074 - Accuracy: 0.9672


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.2549 - Val Accuracy: 0.9622


Training:   0%|          | 0/54 [00:00<?, ?it/s]

Epoch: 40/40 - Loss: 1.8886 - Accuracy: 0.9688


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-08 01:54:02,318][0m Trial 0 finished with value: 0.9622216820716858 and parameters: {'loss_learning_rate': 0.006606113390840052, 'learning_rate': 1.8390958150396713e-05, 'weight_decay': 0.0004158910861118078, 'epsilon': 1.682072836926719e-08, 'batch_size': 259, 'epochs': 40}. Best is trial 0 with value: 0.9622216820716858.[0m


Val Loss: 2.2400 - Val Accuracy: 0.9622
Saving best model...
Learning rate for Loss: 0.0001428516512559449
Learning rate: 0.00011530676543166945
Weight decay: 0.003251563095484147
Epsilon: 4.709214560475621e-08
Batch size: 170
Number of epochs: 99


Epochs:   0%|          | 0/99 [00:00<?, ?it/s]

Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 1/99 - Loss: 9.2481 - Accuracy: 0.8321


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.0028 - Val Accuracy: 0.9475


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 2/99 - Loss: 3.4894 - Accuracy: 0.9462


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.1247 - Val Accuracy: 0.9576


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 3/99 - Loss: 2.8400 - Accuracy: 0.9552


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.8097 - Val Accuracy: 0.9566


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 4/99 - Loss: 2.5262 - Accuracy: 0.9592


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.6292 - Val Accuracy: 0.9593


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 5/99 - Loss: 2.3244 - Accuracy: 0.9625


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.6672 - Val Accuracy: 0.9518


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 6/99 - Loss: 2.2112 - Accuracy: 0.9656


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4534 - Val Accuracy: 0.9572


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 7/99 - Loss: 2.0683 - Accuracy: 0.9663


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4081 - Val Accuracy: 0.9572


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 8/99 - Loss: 2.0181 - Accuracy: 0.9677


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3986 - Val Accuracy: 0.9561


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 9/99 - Loss: 1.9197 - Accuracy: 0.9689


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3546 - Val Accuracy: 0.9592


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 10/99 - Loss: 1.8090 - Accuracy: 0.9710


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3304 - Val Accuracy: 0.9581


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 11/99 - Loss: 1.7648 - Accuracy: 0.9714


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3107 - Val Accuracy: 0.9602


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 12/99 - Loss: 1.7154 - Accuracy: 0.9714


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3358 - Val Accuracy: 0.9586


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 13/99 - Loss: 1.6579 - Accuracy: 0.9722


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3460 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 14/99 - Loss: 1.6258 - Accuracy: 0.9731


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2415 - Val Accuracy: 0.9602


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 15/99 - Loss: 1.5848 - Accuracy: 0.9719


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3127 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 16/99 - Loss: 1.5631 - Accuracy: 0.9728


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2657 - Val Accuracy: 0.9582


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 17/99 - Loss: 1.5308 - Accuracy: 0.9736


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3415 - Val Accuracy: 0.9597


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 18/99 - Loss: 1.5063 - Accuracy: 0.9729


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2717 - Val Accuracy: 0.9597


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 19/99 - Loss: 1.4647 - Accuracy: 0.9743


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2382 - Val Accuracy: 0.9593


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 20/99 - Loss: 1.4531 - Accuracy: 0.9748


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3056 - Val Accuracy: 0.9593


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 21/99 - Loss: 1.4533 - Accuracy: 0.9752


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.1794 - Val Accuracy: 0.9629


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 22/99 - Loss: 1.3821 - Accuracy: 0.9763


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2955 - Val Accuracy: 0.9602


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 23/99 - Loss: 1.3488 - Accuracy: 0.9765


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2562 - Val Accuracy: 0.9566


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 24/99 - Loss: 1.3440 - Accuracy: 0.9768


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2469 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 25/99 - Loss: 1.3182 - Accuracy: 0.9771


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3958 - Val Accuracy: 0.9570


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 26/99 - Loss: 1.3452 - Accuracy: 0.9767


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.1856 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 27/99 - Loss: 1.3619 - Accuracy: 0.9758


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.1930 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 28/99 - Loss: 1.2878 - Accuracy: 0.9777


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2957 - Val Accuracy: 0.9576


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 29/99 - Loss: 1.2518 - Accuracy: 0.9772


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2839 - Val Accuracy: 0.9586


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 30/99 - Loss: 1.2292 - Accuracy: 0.9783


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3627 - Val Accuracy: 0.9566


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 31/99 - Loss: 1.2370 - Accuracy: 0.9777


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3146 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 32/99 - Loss: 1.2293 - Accuracy: 0.9772


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3278 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 33/99 - Loss: 1.2356 - Accuracy: 0.9770


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2773 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 34/99 - Loss: 1.1981 - Accuracy: 0.9780


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3042 - Val Accuracy: 0.9572


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 35/99 - Loss: 1.2238 - Accuracy: 0.9781


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3068 - Val Accuracy: 0.9572


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 36/99 - Loss: 1.2240 - Accuracy: 0.9769


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2463 - Val Accuracy: 0.9582


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 37/99 - Loss: 1.1642 - Accuracy: 0.9792


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2518 - Val Accuracy: 0.9620


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 38/99 - Loss: 1.1886 - Accuracy: 0.9778


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3039 - Val Accuracy: 0.9577


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 39/99 - Loss: 1.1538 - Accuracy: 0.9778


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2765 - Val Accuracy: 0.9556


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 40/99 - Loss: 1.1382 - Accuracy: 0.9782


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3683 - Val Accuracy: 0.9577


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 41/99 - Loss: 1.1118 - Accuracy: 0.9795


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3689 - Val Accuracy: 0.9582


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 42/99 - Loss: 1.0827 - Accuracy: 0.9793


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.2819 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 43/99 - Loss: 1.1155 - Accuracy: 0.9784


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3260 - Val Accuracy: 0.9593


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 44/99 - Loss: 1.0863 - Accuracy: 0.9787


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3315 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 45/99 - Loss: 1.0858 - Accuracy: 0.9801


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3083 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 46/99 - Loss: 1.1056 - Accuracy: 0.9799


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3688 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 47/99 - Loss: 1.0600 - Accuracy: 0.9792


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4410 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 48/99 - Loss: 1.0545 - Accuracy: 0.9806


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3885 - Val Accuracy: 0.9598


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 49/99 - Loss: 1.0383 - Accuracy: 0.9798


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4370 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 50/99 - Loss: 1.0419 - Accuracy: 0.9806


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4280 - Val Accuracy: 0.9602


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 51/99 - Loss: 1.0269 - Accuracy: 0.9799


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4724 - Val Accuracy: 0.9566


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 52/99 - Loss: 1.0347 - Accuracy: 0.9796


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3555 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 53/99 - Loss: 1.0618 - Accuracy: 0.9784


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4120 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 54/99 - Loss: 1.0085 - Accuracy: 0.9807


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3907 - Val Accuracy: 0.9598


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 55/99 - Loss: 0.9872 - Accuracy: 0.9807


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3574 - Val Accuracy: 0.9593


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 56/99 - Loss: 0.9922 - Accuracy: 0.9821


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4064 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 57/99 - Loss: 0.9933 - Accuracy: 0.9806


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5158 - Val Accuracy: 0.9582


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 58/99 - Loss: 0.9849 - Accuracy: 0.9805


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4893 - Val Accuracy: 0.9557


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 59/99 - Loss: 0.9740 - Accuracy: 0.9806


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5140 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 60/99 - Loss: 1.0224 - Accuracy: 0.9798


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3724 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 61/99 - Loss: 0.9889 - Accuracy: 0.9802


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4069 - Val Accuracy: 0.9598


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 62/99 - Loss: 0.9901 - Accuracy: 0.9805


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4783 - Val Accuracy: 0.9589


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 63/99 - Loss: 0.9564 - Accuracy: 0.9812


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4414 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 64/99 - Loss: 1.0056 - Accuracy: 0.9801


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4618 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 65/99 - Loss: 1.0254 - Accuracy: 0.9790


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4505 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 66/99 - Loss: 0.9114 - Accuracy: 0.9809


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.3919 - Val Accuracy: 0.9625


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 67/99 - Loss: 0.9104 - Accuracy: 0.9818


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4077 - Val Accuracy: 0.9593


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 68/99 - Loss: 0.9621 - Accuracy: 0.9805


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4288 - Val Accuracy: 0.9604


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 69/99 - Loss: 0.9375 - Accuracy: 0.9815


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4338 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 70/99 - Loss: 0.9787 - Accuracy: 0.9794


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4621 - Val Accuracy: 0.9582


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 71/99 - Loss: 0.8944 - Accuracy: 0.9815


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4575 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 72/99 - Loss: 0.9046 - Accuracy: 0.9813


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4766 - Val Accuracy: 0.9582


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 73/99 - Loss: 0.9332 - Accuracy: 0.9803


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4613 - Val Accuracy: 0.9578


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 74/99 - Loss: 0.8801 - Accuracy: 0.9815


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4925 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 75/99 - Loss: 0.8613 - Accuracy: 0.9823


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5592 - Val Accuracy: 0.9600


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 76/99 - Loss: 0.8574 - Accuracy: 0.9819


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4388 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 77/99 - Loss: 0.8659 - Accuracy: 0.9816


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4631 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 78/99 - Loss: 0.8661 - Accuracy: 0.9819


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4172 - Val Accuracy: 0.9636


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 79/99 - Loss: 0.8617 - Accuracy: 0.9814


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4791 - Val Accuracy: 0.9630


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 80/99 - Loss: 0.8609 - Accuracy: 0.9818


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4733 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 81/99 - Loss: 0.8477 - Accuracy: 0.9829


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4427 - Val Accuracy: 0.9636


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 82/99 - Loss: 0.8960 - Accuracy: 0.9818


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5151 - Val Accuracy: 0.9598


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 83/99 - Loss: 0.8626 - Accuracy: 0.9821


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5827 - Val Accuracy: 0.9578


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 84/99 - Loss: 0.8500 - Accuracy: 0.9817


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4060 - Val Accuracy: 0.9630


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 85/99 - Loss: 0.8168 - Accuracy: 0.9825


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4369 - Val Accuracy: 0.9635


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 86/99 - Loss: 0.8318 - Accuracy: 0.9819


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4712 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 87/99 - Loss: 0.8479 - Accuracy: 0.9808


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4599 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 88/99 - Loss: 0.8148 - Accuracy: 0.9836


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4402 - Val Accuracy: 0.9588


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 89/99 - Loss: 0.8085 - Accuracy: 0.9829


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.6369 - Val Accuracy: 0.9577


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 90/99 - Loss: 0.8361 - Accuracy: 0.9833


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4628 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 91/99 - Loss: 0.8240 - Accuracy: 0.9812


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4597 - Val Accuracy: 0.9614


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 92/99 - Loss: 0.8293 - Accuracy: 0.9814


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5022 - Val Accuracy: 0.9630


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 93/99 - Loss: 0.8320 - Accuracy: 0.9821


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5568 - Val Accuracy: 0.9625


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 94/99 - Loss: 0.9444 - Accuracy: 0.9789


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4693 - Val Accuracy: 0.9620


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 95/99 - Loss: 0.8140 - Accuracy: 0.9825


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5119 - Val Accuracy: 0.9583


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 96/99 - Loss: 0.7981 - Accuracy: 0.9824


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5911 - Val Accuracy: 0.9551


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 97/99 - Loss: 0.8300 - Accuracy: 0.9816


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.4844 - Val Accuracy: 0.9609


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 98/99 - Loss: 0.7661 - Accuracy: 0.9841


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5476 - Val Accuracy: 0.9578


Training:   0%|          | 0/83 [00:00<?, ?it/s]

Epoch: 99/99 - Loss: 0.8301 - Accuracy: 0.9810


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:15:40,004][0m Trial 1 finished with value: 0.9598336815834045 and parameters: {'loss_learning_rate': 0.0001428516512559449, 'learning_rate': 0.00011530676543166945, 'weight_decay': 0.003251563095484147, 'epsilon': 4.709214560475621e-08, 'batch_size': 170, 'epochs': 99}. Best is trial 0 with value: 0.9622216820716858.[0m


Val Loss: 2.4772 - Val Accuracy: 0.9598
Learning rate for Loss: 0.00010240472151520826
Learning rate: 3.9817323837093204e-05
Weight decay: 0.0005073897249758307
Epsilon: 8.473877844166362e-08
Batch size: 297
Number of epochs: 48


Epochs:   0%|          | 0/48 [00:00<?, ?it/s]

Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 1/48 - Loss: 18.4768 - Accuracy: 0.6493


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 13.3377 - Val Accuracy: 0.8185


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 2/48 - Loss: 9.6927 - Accuracy: 0.8674


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 6.8352 - Val Accuracy: 0.9181


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 3/48 - Loss: 6.1425 - Accuracy: 0.9220


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 5.0085 - Val Accuracy: 0.9411


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 4/48 - Loss: 4.7675 - Accuracy: 0.9390


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 4.2437 - Val Accuracy: 0.9479


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 5/48 - Loss: 4.1238 - Accuracy: 0.9436


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 3.7076 - Val Accuracy: 0.9526


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 6/48 - Loss: 3.6557 - Accuracy: 0.9494


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 3.3926 - Val Accuracy: 0.9559


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 7/48 - Loss: 3.3409 - Accuracy: 0.9527


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 3.1834 - Val Accuracy: 0.9560


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 8/48 - Loss: 3.2171 - Accuracy: 0.9534


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 3.0483 - Val Accuracy: 0.9560


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 9/48 - Loss: 3.0806 - Accuracy: 0.9559


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.9094 - Val Accuracy: 0.9594


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 10/48 - Loss: 2.8279 - Accuracy: 0.9583


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.8352 - Val Accuracy: 0.9606


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 11/48 - Loss: 2.6889 - Accuracy: 0.9598


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.7706 - Val Accuracy: 0.9594


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 12/48 - Loss: 2.6087 - Accuracy: 0.9604


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.6542 - Val Accuracy: 0.9594


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 13/48 - Loss: 2.5192 - Accuracy: 0.9619


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.6057 - Val Accuracy: 0.9599


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 14/48 - Loss: 2.5060 - Accuracy: 0.9604


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.5409 - Val Accuracy: 0.9618


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 15/48 - Loss: 2.4309 - Accuracy: 0.9623


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.5288 - Val Accuracy: 0.9616


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 16/48 - Loss: 2.3565 - Accuracy: 0.9625


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.5034 - Val Accuracy: 0.9600


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 17/48 - Loss: 2.3016 - Accuracy: 0.9649


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.4678 - Val Accuracy: 0.9600


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 18/48 - Loss: 2.2548 - Accuracy: 0.9649


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.4311 - Val Accuracy: 0.9606


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 19/48 - Loss: 2.2158 - Accuracy: 0.9645


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.4085 - Val Accuracy: 0.9600


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 20/48 - Loss: 2.1355 - Accuracy: 0.9663


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3853 - Val Accuracy: 0.9594


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 21/48 - Loss: 2.1355 - Accuracy: 0.9660


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3391 - Val Accuracy: 0.9618


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 22/48 - Loss: 2.0886 - Accuracy: 0.9667


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3433 - Val Accuracy: 0.9613


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 23/48 - Loss: 2.0845 - Accuracy: 0.9670


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3216 - Val Accuracy: 0.9589


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 24/48 - Loss: 2.0197 - Accuracy: 0.9682


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2777 - Val Accuracy: 0.9588


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 25/48 - Loss: 2.0633 - Accuracy: 0.9674


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3091 - Val Accuracy: 0.9618


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 26/48 - Loss: 1.9401 - Accuracy: 0.9686


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2798 - Val Accuracy: 0.9623


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 27/48 - Loss: 1.9123 - Accuracy: 0.9693


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2807 - Val Accuracy: 0.9606


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 28/48 - Loss: 1.8916 - Accuracy: 0.9698


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2311 - Val Accuracy: 0.9642


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 29/48 - Loss: 1.9518 - Accuracy: 0.9683


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2606 - Val Accuracy: 0.9606


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 30/48 - Loss: 1.8997 - Accuracy: 0.9691


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2257 - Val Accuracy: 0.9613


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 31/48 - Loss: 1.8123 - Accuracy: 0.9707


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2348 - Val Accuracy: 0.9613


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 32/48 - Loss: 1.8411 - Accuracy: 0.9695


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1957 - Val Accuracy: 0.9613


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 33/48 - Loss: 1.7817 - Accuracy: 0.9718


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2486 - Val Accuracy: 0.9606


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 34/48 - Loss: 1.7836 - Accuracy: 0.9710


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2010 - Val Accuracy: 0.9630


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 35/48 - Loss: 1.7330 - Accuracy: 0.9717


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1770 - Val Accuracy: 0.9653


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 36/48 - Loss: 1.7207 - Accuracy: 0.9720


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1637 - Val Accuracy: 0.9634


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 37/48 - Loss: 1.8350 - Accuracy: 0.9692


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1720 - Val Accuracy: 0.9634


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 38/48 - Loss: 1.7131 - Accuracy: 0.9725


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1685 - Val Accuracy: 0.9635


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 39/48 - Loss: 1.6584 - Accuracy: 0.9724


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1496 - Val Accuracy: 0.9629


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 40/48 - Loss: 1.6766 - Accuracy: 0.9714


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1533 - Val Accuracy: 0.9642


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 41/48 - Loss: 1.6313 - Accuracy: 0.9732


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1665 - Val Accuracy: 0.9630


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 42/48 - Loss: 1.6162 - Accuracy: 0.9729


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1547 - Val Accuracy: 0.9636


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 43/48 - Loss: 1.6316 - Accuracy: 0.9731


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1693 - Val Accuracy: 0.9618


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 44/48 - Loss: 1.6678 - Accuracy: 0.9719


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2090 - Val Accuracy: 0.9641


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 45/48 - Loss: 1.5787 - Accuracy: 0.9736


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1558 - Val Accuracy: 0.9623


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 46/48 - Loss: 1.5628 - Accuracy: 0.9736


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1375 - Val Accuracy: 0.9624


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 47/48 - Loss: 1.6049 - Accuracy: 0.9723


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1497 - Val Accuracy: 0.9634


Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 48/48 - Loss: 1.5533 - Accuracy: 0.9740


  0%|          | 0/6 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:26:11,855][0m Trial 2 finished with value: 0.9623240828514099 and parameters: {'loss_learning_rate': 0.00010240472151520826, 'learning_rate': 3.9817323837093204e-05, 'weight_decay': 0.0005073897249758307, 'epsilon': 8.473877844166362e-08, 'batch_size': 297, 'epochs': 48}. Best is trial 2 with value: 0.9623240828514099.[0m


Val Loss: 2.1308 - Val Accuracy: 0.9623
Saving best model...
Learning rate for Loss: 0.0004970044944855519
Learning rate: 5.2275844188785164e-05
Weight decay: 0.001951592399367555
Epsilon: 2.416759231439704e-09
Batch size: 233
Number of epochs: 22


Epochs:   0%|          | 0/22 [00:00<?, ?it/s]

Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 1/22 - Loss: 16.0898 - Accuracy: 0.6552


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 8.7678 - Val Accuracy: 0.8676


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 2/22 - Loss: 6.3682 - Accuracy: 0.9108


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 4.6576 - Val Accuracy: 0.9435


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 3/22 - Loss: 4.2377 - Accuracy: 0.9402


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 3.6017 - Val Accuracy: 0.9516


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 4/22 - Loss: 3.4843 - Accuracy: 0.9492


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 3.1369 - Val Accuracy: 0.9532


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 5/22 - Loss: 3.0821 - Accuracy: 0.9530


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.8913 - Val Accuracy: 0.9543


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 6/22 - Loss: 2.7952 - Accuracy: 0.9564


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.6940 - Val Accuracy: 0.9575


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 7/22 - Loss: 2.6132 - Accuracy: 0.9589


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.6002 - Val Accuracy: 0.9559


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 8/22 - Loss: 2.4692 - Accuracy: 0.9604


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.4789 - Val Accuracy: 0.9602


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 9/22 - Loss: 2.3422 - Accuracy: 0.9620


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.4267 - Val Accuracy: 0.9623


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 10/22 - Loss: 2.2777 - Accuracy: 0.9623


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.4315 - Val Accuracy: 0.9607


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 11/22 - Loss: 2.1698 - Accuracy: 0.9642


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.3343 - Val Accuracy: 0.9612


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 12/22 - Loss: 2.1082 - Accuracy: 0.9658


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.4036 - Val Accuracy: 0.9585


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 13/22 - Loss: 2.0383 - Accuracy: 0.9667


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2691 - Val Accuracy: 0.9624


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 14/22 - Loss: 2.0005 - Accuracy: 0.9673


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2428 - Val Accuracy: 0.9623


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 15/22 - Loss: 1.9364 - Accuracy: 0.9684


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2826 - Val Accuracy: 0.9623


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 16/22 - Loss: 1.8940 - Accuracy: 0.9702


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2373 - Val Accuracy: 0.9618


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 17/22 - Loss: 1.8714 - Accuracy: 0.9694


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2960 - Val Accuracy: 0.9591


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 18/22 - Loss: 1.8498 - Accuracy: 0.9702


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2201 - Val Accuracy: 0.9602


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 19/22 - Loss: 1.7862 - Accuracy: 0.9710


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.1983 - Val Accuracy: 0.9645


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 20/22 - Loss: 1.7537 - Accuracy: 0.9710


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.2212 - Val Accuracy: 0.9629


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 21/22 - Loss: 1.7292 - Accuracy: 0.9713


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.1988 - Val Accuracy: 0.9639


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 22/22 - Loss: 1.7046 - Accuracy: 0.9716


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:30:33,742][0m Trial 3 finished with value: 0.9623286724090576 and parameters: {'loss_learning_rate': 0.0004970044944855519, 'learning_rate': 5.2275844188785164e-05, 'weight_decay': 0.001951592399367555, 'epsilon': 2.416759231439704e-09, 'batch_size': 233, 'epochs': 22}. Best is trial 3 with value: 0.9623286724090576.[0m


Val Loss: 2.1792 - Val Accuracy: 0.9623
Saving best model...
Learning rate for Loss: 0.0017895391447149327
Learning rate: 0.00014153084976650568
Weight decay: 0.00024977165046127336
Epsilon: 2.267509422396454e-08
Batch size: 77
Number of epochs: 58


Epochs:   0%|          | 0/58 [00:00<?, ?it/s]

Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 1/58 - Loss: 5.9912 - Accuracy: 0.8892


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.7374 - Val Accuracy: 0.9548


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 2/58 - Loss: 2.5947 - Accuracy: 0.9562


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.3800 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 3/58 - Loss: 2.2737 - Accuracy: 0.9612


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.3742 - Val Accuracy: 0.9607


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 4/58 - Loss: 2.0953 - Accuracy: 0.9648


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.4892 - Val Accuracy: 0.9562


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 5/58 - Loss: 1.9325 - Accuracy: 0.9670


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1615 - Val Accuracy: 0.9605


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 6/58 - Loss: 1.8801 - Accuracy: 0.9683


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1423 - Val Accuracy: 0.9658


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 7/58 - Loss: 1.8248 - Accuracy: 0.9695


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1558 - Val Accuracy: 0.9647


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 8/58 - Loss: 1.7462 - Accuracy: 0.9699


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.0892 - Val Accuracy: 0.9661


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 9/58 - Loss: 1.7128 - Accuracy: 0.9711


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2679 - Val Accuracy: 0.9613


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 10/58 - Loss: 1.5841 - Accuracy: 0.9734


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1878 - Val Accuracy: 0.9605


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 11/58 - Loss: 1.5896 - Accuracy: 0.9717


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1517 - Val Accuracy: 0.9593


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 12/58 - Loss: 1.5591 - Accuracy: 0.9727


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2477 - Val Accuracy: 0.9607


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 13/58 - Loss: 1.5332 - Accuracy: 0.9728


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1712 - Val Accuracy: 0.9633


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 14/58 - Loss: 1.4792 - Accuracy: 0.9733


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2792 - Val Accuracy: 0.9599


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 15/58 - Loss: 1.4911 - Accuracy: 0.9736


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1179 - Val Accuracy: 0.9619


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 16/58 - Loss: 1.4151 - Accuracy: 0.9742


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2052 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 17/58 - Loss: 1.4549 - Accuracy: 0.9729


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.4785 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 18/58 - Loss: 1.3663 - Accuracy: 0.9752


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2554 - Val Accuracy: 0.9627


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 19/58 - Loss: 1.3925 - Accuracy: 0.9740


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1733 - Val Accuracy: 0.9636


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 20/58 - Loss: 1.3584 - Accuracy: 0.9754


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1492 - Val Accuracy: 0.9605


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 21/58 - Loss: 1.3748 - Accuracy: 0.9746


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1749 - Val Accuracy: 0.9624


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 22/58 - Loss: 1.2872 - Accuracy: 0.9760


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1990 - Val Accuracy: 0.9605


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 23/58 - Loss: 1.2657 - Accuracy: 0.9762


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1114 - Val Accuracy: 0.9638


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 24/58 - Loss: 1.2699 - Accuracy: 0.9762


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1046 - Val Accuracy: 0.9633


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 25/58 - Loss: 1.2383 - Accuracy: 0.9767


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2427 - Val Accuracy: 0.9599


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 26/58 - Loss: 1.2208 - Accuracy: 0.9769


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2361 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 27/58 - Loss: 1.2387 - Accuracy: 0.9765


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1857 - Val Accuracy: 0.9644


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 28/58 - Loss: 1.2249 - Accuracy: 0.9760


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2447 - Val Accuracy: 0.9588


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 29/58 - Loss: 1.2322 - Accuracy: 0.9755


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2342 - Val Accuracy: 0.9599


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 30/58 - Loss: 1.1651 - Accuracy: 0.9769


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1753 - Val Accuracy: 0.9593


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 31/58 - Loss: 1.1558 - Accuracy: 0.9767


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2261 - Val Accuracy: 0.9607


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 32/58 - Loss: 1.1561 - Accuracy: 0.9766


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2075 - Val Accuracy: 0.9621


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 33/58 - Loss: 1.1477 - Accuracy: 0.9781


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1614 - Val Accuracy: 0.9621


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 34/58 - Loss: 1.1387 - Accuracy: 0.9774


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2015 - Val Accuracy: 0.9644


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 35/58 - Loss: 1.1305 - Accuracy: 0.9781


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1481 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 36/58 - Loss: 1.1410 - Accuracy: 0.9769


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2268 - Val Accuracy: 0.9621


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 37/58 - Loss: 1.1369 - Accuracy: 0.9763


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1493 - Val Accuracy: 0.9650


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 38/58 - Loss: 1.0762 - Accuracy: 0.9776


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1999 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 39/58 - Loss: 1.0730 - Accuracy: 0.9775


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2586 - Val Accuracy: 0.9630


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 40/58 - Loss: 1.0718 - Accuracy: 0.9782


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2875 - Val Accuracy: 0.9641


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 41/58 - Loss: 1.0444 - Accuracy: 0.9784


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1825 - Val Accuracy: 0.9638


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 42/58 - Loss: 1.0331 - Accuracy: 0.9790


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2567 - Val Accuracy: 0.9610


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 43/58 - Loss: 1.0524 - Accuracy: 0.9784


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2425 - Val Accuracy: 0.9633


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 44/58 - Loss: 0.9952 - Accuracy: 0.9789


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1936 - Val Accuracy: 0.9627


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 45/58 - Loss: 1.0379 - Accuracy: 0.9787


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1654 - Val Accuracy: 0.9636


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 46/58 - Loss: 0.9876 - Accuracy: 0.9786


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1561 - Val Accuracy: 0.9627


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 47/58 - Loss: 1.0278 - Accuracy: 0.9783


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2655 - Val Accuracy: 0.9610


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 48/58 - Loss: 1.0288 - Accuracy: 0.9776


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2716 - Val Accuracy: 0.9599


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 49/58 - Loss: 1.0005 - Accuracy: 0.9777


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2638 - Val Accuracy: 0.9621


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 50/58 - Loss: 0.9856 - Accuracy: 0.9791


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2690 - Val Accuracy: 0.9610


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 51/58 - Loss: 0.9769 - Accuracy: 0.9782


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2299 - Val Accuracy: 0.9650


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 52/58 - Loss: 0.9826 - Accuracy: 0.9793


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2188 - Val Accuracy: 0.9621


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 53/58 - Loss: 0.9735 - Accuracy: 0.9788


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.3103 - Val Accuracy: 0.9652


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 54/58 - Loss: 0.9992 - Accuracy: 0.9791


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2352 - Val Accuracy: 0.9636


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 55/58 - Loss: 0.9507 - Accuracy: 0.9791


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.2102 - Val Accuracy: 0.9633


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 56/58 - Loss: 0.9332 - Accuracy: 0.9791


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.1764 - Val Accuracy: 0.9670


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 57/58 - Loss: 0.9445 - Accuracy: 0.9794


  0%|          | 0/23 [00:00<?, ?it/s]

Val Loss: 2.3453 - Val Accuracy: 0.9616


Training:   0%|          | 0/182 [00:00<?, ?it/s]

Epoch: 58/58 - Loss: 0.9339 - Accuracy: 0.9792


  0%|          | 0/23 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:41:31,245][0m Trial 4 finished with value: 0.9615814089775085 and parameters: {'loss_learning_rate': 0.0017895391447149327, 'learning_rate': 0.00014153084976650568, 'weight_decay': 0.00024977165046127336, 'epsilon': 2.267509422396454e-08, 'batch_size': 77, 'epochs': 58}. Best is trial 3 with value: 0.9623286724090576.[0m


Val Loss: 2.3668 - Val Accuracy: 0.9616
Learning rate for Loss: 0.0007931984885076057
Learning rate: 2.7201280393223203e-05
Weight decay: 0.0022720513417521402
Epsilon: 5.347267838527074e-09
Batch size: 255
Number of epochs: 40


Epochs:   0%|          | 0/40 [00:00<?, ?it/s]

Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 1/40 - Loss: 19.0193 - Accuracy: 0.5886


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:41:41,834][0m Trial 5 pruned. [0m


Val Loss: 14.9613 - Val Accuracy: 0.7010
Learning rate for Loss: 0.000782079191830132
Learning rate: 0.052323885244708865
Weight decay: 0.00015309102172715312
Epsilon: 1.169259363817395e-09
Batch size: 121
Number of epochs: 99


Epochs:   0%|          | 0/99 [00:00<?, ?it/s]

Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 1/99 - Loss: 55.2095 - Accuracy: 0.8621


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:41:55,463][0m Trial 6 pruned. [0m


Val Loss: 13.2700 - Val Accuracy: 0.7639
Learning rate for Loss: 0.0011417595824279923
Learning rate: 0.0008881431649344765
Weight decay: 0.00023876820912612152
Epsilon: 6.681576717748813e-09
Batch size: 201
Number of epochs: 81


Epochs:   0%|          | 0/81 [00:00<?, ?it/s]

Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 1/81 - Loss: 4.6923 - Accuracy: 0.8848


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.4694 - Val Accuracy: 0.9598


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 2/81 - Loss: 2.4153 - Accuracy: 0.9582


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3960 - Val Accuracy: 0.9520


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 3/81 - Loss: 2.2123 - Accuracy: 0.9599


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.4071 - Val Accuracy: 0.9590


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 4/81 - Loss: 2.2021 - Accuracy: 0.9598


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2500 - Val Accuracy: 0.9636


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 5/81 - Loss: 1.9173 - Accuracy: 0.9650


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3743 - Val Accuracy: 0.9595


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 6/81 - Loss: 1.8382 - Accuracy: 0.9659


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2189 - Val Accuracy: 0.9628


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 7/81 - Loss: 1.8332 - Accuracy: 0.9655


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1546 - Val Accuracy: 0.9633


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 8/81 - Loss: 1.8522 - Accuracy: 0.9652


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2623 - Val Accuracy: 0.9636


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 9/81 - Loss: 1.6445 - Accuracy: 0.9678


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1919 - Val Accuracy: 0.9628


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 10/81 - Loss: 1.6141 - Accuracy: 0.9696


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.4428 - Val Accuracy: 0.9558


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 11/81 - Loss: 1.5698 - Accuracy: 0.9693


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.4005 - Val Accuracy: 0.9589


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 12/81 - Loss: 1.4773 - Accuracy: 0.9712


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3489 - Val Accuracy: 0.9606


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 13/81 - Loss: 1.5481 - Accuracy: 0.9681


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3001 - Val Accuracy: 0.9606


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 14/81 - Loss: 1.4345 - Accuracy: 0.9723


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3197 - Val Accuracy: 0.9609


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 15/81 - Loss: 1.4726 - Accuracy: 0.9700


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3383 - Val Accuracy: 0.9614


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 16/81 - Loss: 1.4151 - Accuracy: 0.9710


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2614 - Val Accuracy: 0.9631


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 17/81 - Loss: 1.4977 - Accuracy: 0.9692


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3416 - Val Accuracy: 0.9592


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 18/81 - Loss: 1.4023 - Accuracy: 0.9713


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1862 - Val Accuracy: 0.9641


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 19/81 - Loss: 1.4198 - Accuracy: 0.9710


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.5302 - Val Accuracy: 0.9619


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 20/81 - Loss: 1.4639 - Accuracy: 0.9702


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3943 - Val Accuracy: 0.9617


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 21/81 - Loss: 1.2960 - Accuracy: 0.9729


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1417 - Val Accuracy: 0.9622


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 22/81 - Loss: 1.2460 - Accuracy: 0.9739


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1044 - Val Accuracy: 0.9652


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 23/81 - Loss: 1.2229 - Accuracy: 0.9737


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1883 - Val Accuracy: 0.9633


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 24/81 - Loss: 1.2565 - Accuracy: 0.9741


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1386 - Val Accuracy: 0.9611


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 25/81 - Loss: 1.2394 - Accuracy: 0.9734


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3547 - Val Accuracy: 0.9625


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 26/81 - Loss: 1.2621 - Accuracy: 0.9731


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1149 - Val Accuracy: 0.9647


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 27/81 - Loss: 1.2539 - Accuracy: 0.9726


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3537 - Val Accuracy: 0.9619


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 28/81 - Loss: 1.1404 - Accuracy: 0.9755


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.0837 - Val Accuracy: 0.9631


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 29/81 - Loss: 1.2617 - Accuracy: 0.9717


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1751 - Val Accuracy: 0.9628


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 30/81 - Loss: 1.2018 - Accuracy: 0.9740


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2048 - Val Accuracy: 0.9622


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 31/81 - Loss: 1.1402 - Accuracy: 0.9752


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1477 - Val Accuracy: 0.9636


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 32/81 - Loss: 1.1309 - Accuracy: 0.9752


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1954 - Val Accuracy: 0.9608


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 33/81 - Loss: 1.1077 - Accuracy: 0.9760


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1782 - Val Accuracy: 0.9586


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 34/81 - Loss: 1.0946 - Accuracy: 0.9761


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2900 - Val Accuracy: 0.9625


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 35/81 - Loss: 1.1213 - Accuracy: 0.9752


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2749 - Val Accuracy: 0.9606


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 36/81 - Loss: 1.0905 - Accuracy: 0.9751


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.0926 - Val Accuracy: 0.9652


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 37/81 - Loss: 1.0432 - Accuracy: 0.9774


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2160 - Val Accuracy: 0.9625


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 38/81 - Loss: 1.0295 - Accuracy: 0.9758


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2567 - Val Accuracy: 0.9617


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 39/81 - Loss: 1.0483 - Accuracy: 0.9767


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1410 - Val Accuracy: 0.9625


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 40/81 - Loss: 1.2198 - Accuracy: 0.9718


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1344 - Val Accuracy: 0.9630


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 41/81 - Loss: 1.1199 - Accuracy: 0.9746


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1513 - Val Accuracy: 0.9611


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 42/81 - Loss: 1.0054 - Accuracy: 0.9780


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2435 - Val Accuracy: 0.9603


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 43/81 - Loss: 1.0029 - Accuracy: 0.9770


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1926 - Val Accuracy: 0.9652


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 44/81 - Loss: 1.0120 - Accuracy: 0.9764


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2884 - Val Accuracy: 0.9620


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 45/81 - Loss: 1.0467 - Accuracy: 0.9771


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3142 - Val Accuracy: 0.9608


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 46/81 - Loss: 1.0849 - Accuracy: 0.9760


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.0975 - Val Accuracy: 0.9653


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 47/81 - Loss: 0.9687 - Accuracy: 0.9776


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1730 - Val Accuracy: 0.9655


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 48/81 - Loss: 1.0115 - Accuracy: 0.9758


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1842 - Val Accuracy: 0.9625


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 49/81 - Loss: 0.9923 - Accuracy: 0.9773


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1654 - Val Accuracy: 0.9655


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 50/81 - Loss: 0.9365 - Accuracy: 0.9776


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1813 - Val Accuracy: 0.9666


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 51/81 - Loss: 0.9255 - Accuracy: 0.9771


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2723 - Val Accuracy: 0.9617


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 52/81 - Loss: 1.0255 - Accuracy: 0.9761


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1189 - Val Accuracy: 0.9658


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 53/81 - Loss: 0.9508 - Accuracy: 0.9771


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3286 - Val Accuracy: 0.9564


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 54/81 - Loss: 0.9197 - Accuracy: 0.9773


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1966 - Val Accuracy: 0.9658


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 55/81 - Loss: 0.9169 - Accuracy: 0.9781


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2720 - Val Accuracy: 0.9614


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 56/81 - Loss: 0.9089 - Accuracy: 0.9778


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1583 - Val Accuracy: 0.9647


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 57/81 - Loss: 0.9650 - Accuracy: 0.9779


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1894 - Val Accuracy: 0.9669


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 58/81 - Loss: 0.9563 - Accuracy: 0.9794


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1135 - Val Accuracy: 0.9614


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 59/81 - Loss: 0.8702 - Accuracy: 0.9785


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1724 - Val Accuracy: 0.9658


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 60/81 - Loss: 0.9309 - Accuracy: 0.9775


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1693 - Val Accuracy: 0.9652


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 61/81 - Loss: 0.8716 - Accuracy: 0.9790


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2463 - Val Accuracy: 0.9617


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 62/81 - Loss: 0.8341 - Accuracy: 0.9790


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3001 - Val Accuracy: 0.9606


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 63/81 - Loss: 1.0431 - Accuracy: 0.9760


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2782 - Val Accuracy: 0.9608


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 64/81 - Loss: 0.9607 - Accuracy: 0.9762


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1512 - Val Accuracy: 0.9617


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 65/81 - Loss: 1.0660 - Accuracy: 0.9744


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2053 - Val Accuracy: 0.9570


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 66/81 - Loss: 0.8805 - Accuracy: 0.9794


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1698 - Val Accuracy: 0.9641


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 67/81 - Loss: 0.8614 - Accuracy: 0.9770


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1366 - Val Accuracy: 0.9625


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 68/81 - Loss: 0.8524 - Accuracy: 0.9793


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.0814 - Val Accuracy: 0.9622


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 69/81 - Loss: 0.9605 - Accuracy: 0.9755


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3480 - Val Accuracy: 0.9603


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 70/81 - Loss: 0.8617 - Accuracy: 0.9788


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2132 - Val Accuracy: 0.9606


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 71/81 - Loss: 0.8487 - Accuracy: 0.9785


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2451 - Val Accuracy: 0.9606


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 72/81 - Loss: 0.9253 - Accuracy: 0.9769


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3405 - Val Accuracy: 0.9575


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 73/81 - Loss: 0.9314 - Accuracy: 0.9775


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.3103 - Val Accuracy: 0.9612


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 74/81 - Loss: 0.8102 - Accuracy: 0.9817


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1654 - Val Accuracy: 0.9623


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 75/81 - Loss: 0.8243 - Accuracy: 0.9792


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1026 - Val Accuracy: 0.9647


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 76/81 - Loss: 0.8051 - Accuracy: 0.9778


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1558 - Val Accuracy: 0.9619


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 77/81 - Loss: 0.8535 - Accuracy: 0.9783


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1428 - Val Accuracy: 0.9636


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 78/81 - Loss: 0.9486 - Accuracy: 0.9783


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.0953 - Val Accuracy: 0.9653


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 79/81 - Loss: 0.8144 - Accuracy: 0.9801


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.0749 - Val Accuracy: 0.9650


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 80/81 - Loss: 0.7761 - Accuracy: 0.9796


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.1974 - Val Accuracy: 0.9603


Training:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch: 81/81 - Loss: 0.7460 - Accuracy: 0.9807


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:57:30,763][0m Trial 7 finished with value: 0.962224543094635 and parameters: {'loss_learning_rate': 0.0011417595824279923, 'learning_rate': 0.0008881431649344765, 'weight_decay': 0.00023876820912612152, 'epsilon': 6.681576717748813e-09, 'batch_size': 201, 'epochs': 81}. Best is trial 3 with value: 0.9623286724090576.[0m


Val Loss: 2.1875 - Val Accuracy: 0.9622
Learning rate for Loss: 0.0001093982632341954
Learning rate: 0.0011988815220724553
Weight decay: 0.00010769416830773792
Epsilon: 1.4773041916124123e-09
Batch size: 176
Number of epochs: 64


Epochs:   0%|          | 0/64 [00:00<?, ?it/s]

Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 1/64 - Loss: 5.3716 - Accuracy: 0.9052


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.6740 - Val Accuracy: 0.9559


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 2/64 - Loss: 2.5467 - Accuracy: 0.9539


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.5119 - Val Accuracy: 0.9610


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 3/64 - Loss: 2.3938 - Accuracy: 0.9563


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.6729 - Val Accuracy: 0.9417


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 4/64 - Loss: 2.0573 - Accuracy: 0.9623


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.3529 - Val Accuracy: 0.9597


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 5/64 - Loss: 1.9099 - Accuracy: 0.9649


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.6400 - Val Accuracy: 0.9558


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 6/64 - Loss: 1.9399 - Accuracy: 0.9615


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2765 - Val Accuracy: 0.9547


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 7/64 - Loss: 1.7594 - Accuracy: 0.9649


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2552 - Val Accuracy: 0.9622


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 8/64 - Loss: 1.6542 - Accuracy: 0.9655


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2806 - Val Accuracy: 0.9628


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 9/64 - Loss: 1.6213 - Accuracy: 0.9668


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2359 - Val Accuracy: 0.9633


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 10/64 - Loss: 1.5416 - Accuracy: 0.9679


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.4336 - Val Accuracy: 0.9582


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 11/64 - Loss: 1.6066 - Accuracy: 0.9682


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2784 - Val Accuracy: 0.9593


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 12/64 - Loss: 1.5054 - Accuracy: 0.9679


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1698 - Val Accuracy: 0.9634


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 13/64 - Loss: 1.4330 - Accuracy: 0.9704


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2193 - Val Accuracy: 0.9593


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 14/64 - Loss: 1.4104 - Accuracy: 0.9703


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1239 - Val Accuracy: 0.9622


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 15/64 - Loss: 1.4671 - Accuracy: 0.9687


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1739 - Val Accuracy: 0.9564


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 16/64 - Loss: 1.4276 - Accuracy: 0.9683


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.5913 - Val Accuracy: 0.9537


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 17/64 - Loss: 1.3414 - Accuracy: 0.9703


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0813 - Val Accuracy: 0.9593


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 18/64 - Loss: 1.3309 - Accuracy: 0.9712


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2189 - Val Accuracy: 0.9605


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 19/64 - Loss: 1.3496 - Accuracy: 0.9695


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2263 - Val Accuracy: 0.9609


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 20/64 - Loss: 1.4227 - Accuracy: 0.9704


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.3073 - Val Accuracy: 0.9633


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 21/64 - Loss: 1.3457 - Accuracy: 0.9713


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1549 - Val Accuracy: 0.9626


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 22/64 - Loss: 1.4182 - Accuracy: 0.9680


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2940 - Val Accuracy: 0.9633


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 23/64 - Loss: 1.2179 - Accuracy: 0.9729


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2048 - Val Accuracy: 0.9564


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 24/64 - Loss: 1.1754 - Accuracy: 0.9733


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2694 - Val Accuracy: 0.9611


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 25/64 - Loss: 1.2118 - Accuracy: 0.9737


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1113 - Val Accuracy: 0.9638


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 26/64 - Loss: 1.2244 - Accuracy: 0.9716


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1958 - Val Accuracy: 0.9620


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 27/64 - Loss: 1.1454 - Accuracy: 0.9743


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1306 - Val Accuracy: 0.9639


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 28/64 - Loss: 1.1846 - Accuracy: 0.9724


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1666 - Val Accuracy: 0.9616


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 29/64 - Loss: 1.1847 - Accuracy: 0.9733


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1903 - Val Accuracy: 0.9615


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 30/64 - Loss: 1.2366 - Accuracy: 0.9719


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2025 - Val Accuracy: 0.9609


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 31/64 - Loss: 1.1218 - Accuracy: 0.9757


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1549 - Val Accuracy: 0.9634


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 32/64 - Loss: 1.1469 - Accuracy: 0.9743


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1260 - Val Accuracy: 0.9622


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 33/64 - Loss: 1.1646 - Accuracy: 0.9722


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0467 - Val Accuracy: 0.9650


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 34/64 - Loss: 1.0738 - Accuracy: 0.9758


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.3713 - Val Accuracy: 0.9580


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 35/64 - Loss: 1.0531 - Accuracy: 0.9767


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1424 - Val Accuracy: 0.9640


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 36/64 - Loss: 1.1360 - Accuracy: 0.9736


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1701 - Val Accuracy: 0.9633


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 37/64 - Loss: 1.0431 - Accuracy: 0.9763


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1794 - Val Accuracy: 0.9622


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 38/64 - Loss: 1.0538 - Accuracy: 0.9763


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2036 - Val Accuracy: 0.9570


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 39/64 - Loss: 1.1088 - Accuracy: 0.9750


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0606 - Val Accuracy: 0.9655


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 40/64 - Loss: 1.0792 - Accuracy: 0.9759


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1723 - Val Accuracy: 0.9639


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 41/64 - Loss: 1.0581 - Accuracy: 0.9758


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1001 - Val Accuracy: 0.9599


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 42/64 - Loss: 1.1006 - Accuracy: 0.9728


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0788 - Val Accuracy: 0.9582


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 43/64 - Loss: 0.9759 - Accuracy: 0.9767


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1884 - Val Accuracy: 0.9609


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 44/64 - Loss: 0.9601 - Accuracy: 0.9761


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0699 - Val Accuracy: 0.9667


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 45/64 - Loss: 1.0427 - Accuracy: 0.9745


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2776 - Val Accuracy: 0.9571


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 46/64 - Loss: 1.0548 - Accuracy: 0.9747


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2357 - Val Accuracy: 0.9633


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 47/64 - Loss: 0.9701 - Accuracy: 0.9764


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1399 - Val Accuracy: 0.9600


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 48/64 - Loss: 0.9478 - Accuracy: 0.9777


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1404 - Val Accuracy: 0.9616


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 49/64 - Loss: 1.0154 - Accuracy: 0.9757


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2017 - Val Accuracy: 0.9580


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 50/64 - Loss: 0.9758 - Accuracy: 0.9767


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2570 - Val Accuracy: 0.9610


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 51/64 - Loss: 0.9793 - Accuracy: 0.9761


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1729 - Val Accuracy: 0.9604


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 52/64 - Loss: 0.9473 - Accuracy: 0.9763


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1040 - Val Accuracy: 0.9616


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 53/64 - Loss: 1.0002 - Accuracy: 0.9769


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1335 - Val Accuracy: 0.9604


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 54/64 - Loss: 0.9495 - Accuracy: 0.9766


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0757 - Val Accuracy: 0.9605


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 55/64 - Loss: 0.8912 - Accuracy: 0.9787


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1131 - Val Accuracy: 0.9605


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 56/64 - Loss: 0.9067 - Accuracy: 0.9784


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0963 - Val Accuracy: 0.9623


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 57/64 - Loss: 0.9834 - Accuracy: 0.9751


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.3345 - Val Accuracy: 0.9576


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 58/64 - Loss: 0.9849 - Accuracy: 0.9766


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.4344 - Val Accuracy: 0.9553


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 59/64 - Loss: 0.9338 - Accuracy: 0.9776


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1527 - Val Accuracy: 0.9610


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 60/64 - Loss: 0.8663 - Accuracy: 0.9774


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1141 - Val Accuracy: 0.9615


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 61/64 - Loss: 0.8923 - Accuracy: 0.9768


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1357 - Val Accuracy: 0.9598


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 62/64 - Loss: 0.8390 - Accuracy: 0.9783


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1100 - Val Accuracy: 0.9650


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 63/64 - Loss: 0.8417 - Accuracy: 0.9794


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.0500 - Val Accuracy: 0.9656


Training:   0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 64/64 - Loss: 0.8778 - Accuracy: 0.9773


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:09:46,954][0m Trial 8 finished with value: 0.9579933881759644 and parameters: {'loss_learning_rate': 0.0001093982632341954, 'learning_rate': 0.0011988815220724553, 'weight_decay': 0.00010769416830773792, 'epsilon': 1.4773041916124123e-09, 'batch_size': 176, 'epochs': 64}. Best is trial 3 with value: 0.9623286724090576.[0m


Val Loss: 2.1529 - Val Accuracy: 0.9580
Learning rate for Loss: 0.0016836427763545412
Learning rate: 0.0003216964687818695
Weight decay: 0.003288796642704667
Epsilon: 1.3447060538362165e-09
Batch size: 116
Number of epochs: 93


Epochs:   0%|          | 0/93 [00:00<?, ?it/s]

Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 1/93 - Loss: 5.0872 - Accuracy: 0.9104


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.4329 - Val Accuracy: 0.9574


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 2/93 - Loss: 2.5058 - Accuracy: 0.9560


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1664 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 3/93 - Loss: 2.2572 - Accuracy: 0.9615


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0785 - Val Accuracy: 0.9623


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 4/93 - Loss: 2.0700 - Accuracy: 0.9629


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0903 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 5/93 - Loss: 1.9538 - Accuracy: 0.9639


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0863 - Val Accuracy: 0.9617


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 6/93 - Loss: 1.8449 - Accuracy: 0.9665


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1216 - Val Accuracy: 0.9644


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 7/93 - Loss: 1.7368 - Accuracy: 0.9686


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0051 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 8/93 - Loss: 1.6925 - Accuracy: 0.9684


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9979 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 9/93 - Loss: 1.6528 - Accuracy: 0.9691


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1073 - Val Accuracy: 0.9617


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 10/93 - Loss: 1.6414 - Accuracy: 0.9696


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9885 - Val Accuracy: 0.9677


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 11/93 - Loss: 1.5386 - Accuracy: 0.9718


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0450 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 12/93 - Loss: 1.5592 - Accuracy: 0.9707


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0464 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 13/93 - Loss: 1.5357 - Accuracy: 0.9705


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0490 - Val Accuracy: 0.9644


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 14/93 - Loss: 1.4592 - Accuracy: 0.9723


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0354 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 15/93 - Loss: 1.4315 - Accuracy: 0.9730


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0558 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 16/93 - Loss: 1.3796 - Accuracy: 0.9736


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1387 - Val Accuracy: 0.9612


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 17/93 - Loss: 1.4059 - Accuracy: 0.9717


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9931 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 18/93 - Loss: 1.4235 - Accuracy: 0.9723


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0111 - Val Accuracy: 0.9623


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 19/93 - Loss: 1.3055 - Accuracy: 0.9744


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0376 - Val Accuracy: 0.9634


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 20/93 - Loss: 1.2872 - Accuracy: 0.9742


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0260 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 21/93 - Loss: 1.2671 - Accuracy: 0.9741


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0440 - Val Accuracy: 0.9639


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 22/93 - Loss: 1.2513 - Accuracy: 0.9751


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0611 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 23/93 - Loss: 1.2518 - Accuracy: 0.9745


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0255 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 24/93 - Loss: 1.2812 - Accuracy: 0.9734


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0700 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 25/93 - Loss: 1.2735 - Accuracy: 0.9738


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0079 - Val Accuracy: 0.9639


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 26/93 - Loss: 1.2732 - Accuracy: 0.9738


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0719 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 27/93 - Loss: 1.2938 - Accuracy: 0.9725


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0083 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 28/93 - Loss: 1.1370 - Accuracy: 0.9773


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9723 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 29/93 - Loss: 1.1770 - Accuracy: 0.9758


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0673 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 30/93 - Loss: 1.1332 - Accuracy: 0.9763


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0203 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 31/93 - Loss: 1.1070 - Accuracy: 0.9762


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0787 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 32/93 - Loss: 1.1561 - Accuracy: 0.9767


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1560 - Val Accuracy: 0.9634


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 33/93 - Loss: 1.1083 - Accuracy: 0.9755


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1265 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 34/93 - Loss: 1.0886 - Accuracy: 0.9758


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.4519 - Val Accuracy: 0.9607


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 35/93 - Loss: 1.1481 - Accuracy: 0.9762


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1106 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 36/93 - Loss: 1.0605 - Accuracy: 0.9777


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0861 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 37/93 - Loss: 1.0573 - Accuracy: 0.9788


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0022 - Val Accuracy: 0.9644


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 38/93 - Loss: 1.1189 - Accuracy: 0.9762


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0043 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 39/93 - Loss: 1.1105 - Accuracy: 0.9744


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0997 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 40/93 - Loss: 1.0779 - Accuracy: 0.9769


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1189 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 41/93 - Loss: 1.0435 - Accuracy: 0.9779


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.2179 - Val Accuracy: 0.9591


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 42/93 - Loss: 1.0454 - Accuracy: 0.9767


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1441 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 43/93 - Loss: 1.0342 - Accuracy: 0.9765


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9883 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 44/93 - Loss: 1.0574 - Accuracy: 0.9767


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1785 - Val Accuracy: 0.9623


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 45/93 - Loss: 1.0794 - Accuracy: 0.9758


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1295 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 46/93 - Loss: 1.0273 - Accuracy: 0.9773


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9942 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 47/93 - Loss: 0.9799 - Accuracy: 0.9784


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0087 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 48/93 - Loss: 0.9898 - Accuracy: 0.9779


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0361 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 49/93 - Loss: 1.0542 - Accuracy: 0.9782


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.2874 - Val Accuracy: 0.9612


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 50/93 - Loss: 1.0407 - Accuracy: 0.9765


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1478 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 51/93 - Loss: 0.9558 - Accuracy: 0.9780


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0375 - Val Accuracy: 0.9644


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 52/93 - Loss: 0.9378 - Accuracy: 0.9789


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0934 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 53/93 - Loss: 0.9938 - Accuracy: 0.9778


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1578 - Val Accuracy: 0.9644


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 54/93 - Loss: 0.9303 - Accuracy: 0.9784


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1149 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 55/93 - Loss: 0.9176 - Accuracy: 0.9791


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.2561 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 56/93 - Loss: 1.0159 - Accuracy: 0.9779


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0813 - Val Accuracy: 0.9682


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 57/93 - Loss: 0.9390 - Accuracy: 0.9780


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 1.9869 - Val Accuracy: 0.9725


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 58/93 - Loss: 0.8636 - Accuracy: 0.9806


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1375 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 59/93 - Loss: 0.8973 - Accuracy: 0.9786


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1681 - Val Accuracy: 0.9639


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 60/93 - Loss: 0.9240 - Accuracy: 0.9793


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1177 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 61/93 - Loss: 0.9607 - Accuracy: 0.9788


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0708 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 62/93 - Loss: 0.8889 - Accuracy: 0.9781


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1613 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 63/93 - Loss: 0.9161 - Accuracy: 0.9787


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1162 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 64/93 - Loss: 0.9551 - Accuracy: 0.9783


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0970 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 65/93 - Loss: 0.8947 - Accuracy: 0.9792


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1296 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 66/93 - Loss: 0.9483 - Accuracy: 0.9775


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1052 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 67/93 - Loss: 0.8921 - Accuracy: 0.9774


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1619 - Val Accuracy: 0.9639


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 68/93 - Loss: 0.8395 - Accuracy: 0.9797


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0695 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 69/93 - Loss: 0.8410 - Accuracy: 0.9797


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.2158 - Val Accuracy: 0.9634


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 70/93 - Loss: 0.8957 - Accuracy: 0.9792


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.2664 - Val Accuracy: 0.9617


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 71/93 - Loss: 0.8511 - Accuracy: 0.9787


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1763 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 72/93 - Loss: 0.8305 - Accuracy: 0.9796


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1000 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 73/93 - Loss: 0.8393 - Accuracy: 0.9793


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1290 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 74/93 - Loss: 0.9794 - Accuracy: 0.9770


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1116 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 75/93 - Loss: 0.8499 - Accuracy: 0.9800


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0724 - Val Accuracy: 0.9655


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 76/93 - Loss: 0.8535 - Accuracy: 0.9807


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1044 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 77/93 - Loss: 0.8198 - Accuracy: 0.9800


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1585 - Val Accuracy: 0.9639


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 78/93 - Loss: 0.8175 - Accuracy: 0.9792


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1733 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 79/93 - Loss: 0.8923 - Accuracy: 0.9785


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0686 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 80/93 - Loss: 0.7950 - Accuracy: 0.9801


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0094 - Val Accuracy: 0.9682


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 81/93 - Loss: 0.8045 - Accuracy: 0.9801


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0980 - Val Accuracy: 0.9596


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 82/93 - Loss: 0.7998 - Accuracy: 0.9804


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1261 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 83/93 - Loss: 0.8109 - Accuracy: 0.9810


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0908 - Val Accuracy: 0.9682


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 84/93 - Loss: 0.8212 - Accuracy: 0.9791


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1167 - Val Accuracy: 0.9661


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 85/93 - Loss: 0.7535 - Accuracy: 0.9809


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1180 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 86/93 - Loss: 0.7697 - Accuracy: 0.9807


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1037 - Val Accuracy: 0.9677


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 87/93 - Loss: 0.7893 - Accuracy: 0.9806


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1224 - Val Accuracy: 0.9671


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 88/93 - Loss: 0.7548 - Accuracy: 0.9814


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0649 - Val Accuracy: 0.9682


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 89/93 - Loss: 0.7986 - Accuracy: 0.9789


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.5779 - Val Accuracy: 0.9628


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 90/93 - Loss: 0.8030 - Accuracy: 0.9802


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0940 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 91/93 - Loss: 0.7371 - Accuracy: 0.9799


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0814 - Val Accuracy: 0.9666


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 92/93 - Loss: 0.7671 - Accuracy: 0.9797


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.0962 - Val Accuracy: 0.9650


Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 93/93 - Loss: 0.7445 - Accuracy: 0.9805


  0%|          | 0/16 [00:00<?, ?it/s]

Val Loss: 2.1561 - Val Accuracy: 0.9682
Saving best model...


[32m[I 2023-12-08 03:27:21,789][0m Trial 9 finished with value: 0.9682112336158752 and parameters: {'loss_learning_rate': 0.0016836427763545412, 'learning_rate': 0.0003216964687818695, 'weight_decay': 0.003288796642704667, 'epsilon': 1.3447060538362165e-09, 'batch_size': 116, 'epochs': 93}. Best is trial 9 with value: 0.9682112336158752.[0m


Learning rate for Loss: 0.004943856521415986
Learning rate: 0.008399951749754777
Weight decay: 0.008068791483229679
Epsilon: 3.119681308965902e-09
Batch size: 57
Number of epochs: 77


Epochs:   0%|          | 0/77 [00:00<?, ?it/s]

Training:   0%|          | 0/246 [00:00<?, ?it/s]

Epoch: 1/77 - Loss: 6.0495 - Accuracy: 0.9001


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:27:31,394][0m Trial 10 pruned. [0m


Val Loss: 2.9457 - Val Accuracy: 0.9121
Learning rate for Loss: 0.00034137241070796064
Learning rate: 0.000561178545163721
Weight decay: 0.0014920207264559977
Epsilon: 2.0473226960589153e-09
Batch size: 123
Number of epochs: 12


Epochs:   0%|          | 0/12 [00:00<?, ?it/s]

Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 1/12 - Loss: 4.8950 - Accuracy: 0.9176


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.5823 - Val Accuracy: 0.9512


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 2/12 - Loss: 2.4183 - Accuracy: 0.9580


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.4151 - Val Accuracy: 0.9608


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 3/12 - Loss: 2.1969 - Accuracy: 0.9604


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.4454 - Val Accuracy: 0.9601


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 4/12 - Loss: 2.0625 - Accuracy: 0.9622


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.8424 - Val Accuracy: 0.9440


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 5/12 - Loss: 2.0149 - Accuracy: 0.9605


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.3364 - Val Accuracy: 0.9530


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 6/12 - Loss: 1.7957 - Accuracy: 0.9661


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2673 - Val Accuracy: 0.9590


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 7/12 - Loss: 1.7067 - Accuracy: 0.9682


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.4304 - Val Accuracy: 0.9581


Training:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch: 8/12 - Loss: 1.6760 - Accuracy: 0.9663


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:29:04,095][0m Trial 11 pruned. [0m


Val Loss: 2.3695 - Val Accuracy: 0.9584
Learning rate for Loss: 0.002710071390716073
Learning rate: 0.00011545222600165301
Weight decay: 0.005123415081237744
Epsilon: 3.156271539893661e-09
Batch size: 212
Number of epochs: 15


Epochs:   0%|          | 0/15 [00:00<?, ?it/s]

Training:   0%|          | 0/66 [00:00<?, ?it/s]

Epoch: 1/15 - Loss: 10.4354 - Accuracy: 0.7960


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:29:17,557][0m Trial 12 pruned. [0m


Val Loss: 4.0584 - Val Accuracy: 0.9438
Learning rate for Loss: 0.0004459103343647537
Learning rate: 0.0042085082999872665
Weight decay: 0.0009693560709903338
Epsilon: 1.0113897128161214e-09
Batch size: 119
Number of epochs: 26


Epochs:   0%|          | 0/26 [00:00<?, ?it/s]

Training:   0%|          | 0/118 [00:00<?, ?it/s]

Epoch: 1/26 - Loss: 5.0410 - Accuracy: 0.9172


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.5878 - Val Accuracy: 0.9568


Training:   0%|          | 0/118 [00:00<?, ?it/s]

Epoch: 2/26 - Loss: 2.5654 - Accuracy: 0.9467


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.7246 - Val Accuracy: 0.9487


Training:   0%|          | 0/118 [00:00<?, ?it/s]

Epoch: 3/26 - Loss: 2.3913 - Accuracy: 0.9489


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.3494 - Val Accuracy: 0.9540


Training:   0%|          | 0/118 [00:00<?, ?it/s]

Epoch: 4/26 - Loss: 2.1827 - Accuracy: 0.9503


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:30:01,323][0m Trial 13 pruned. [0m


Val Loss: 2.5319 - Val Accuracy: 0.9577
Learning rate for Loss: 0.0003220537303727741
Learning rate: 0.0002821575813520366
Weight decay: 0.0011410693241135988
Epsilon: 2.7479820165061233e-09
Batch size: 157
Number of epochs: 80


Epochs:   0%|          | 0/80 [00:00<?, ?it/s]

Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 1/80 - Loss: 6.0418 - Accuracy: 0.8962


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 2.7073 - Val Accuracy: 0.9559


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 2/80 - Loss: 2.7486 - Accuracy: 0.9540


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 2.5717 - Val Accuracy: 0.9529


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 3/80 - Loss: 2.3490 - Accuracy: 0.9606


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 2.3446 - Val Accuracy: 0.9577


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 4/80 - Loss: 2.1666 - Accuracy: 0.9627


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:30:46,934][0m Trial 14 pruned. [0m


Val Loss: 2.5437 - Val Accuracy: 0.9518
Learning rate for Loss: 0.002345942554348848
Learning rate: 0.0026477265647260315
Weight decay: 0.0034653977970744644
Epsilon: 5.2122180930523775e-09
Batch size: 228
Number of epochs: 24


Epochs:   0%|          | 0/24 [00:00<?, ?it/s]

Training:   0%|          | 0/62 [00:00<?, ?it/s]

Epoch: 1/24 - Loss: 7.0350 - Accuracy: 0.8401


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:30:59,525][0m Trial 15 pruned. [0m


Val Loss: 2.7576 - Val Accuracy: 0.9445
Learning rate for Loss: 0.0002204172522697178
Learning rate: 1.2522719075098866e-05
Weight decay: 0.009798844375346172
Epsilon: 1.2579156214331911e-08
Batch size: 96
Number of epochs: 68


Epochs:   0%|          | 0/68 [00:00<?, ?it/s]

Training:   0%|          | 0/146 [00:00<?, ?it/s]

Epoch: 1/68 - Loss: 19.2629 - Accuracy: 0.6008


  0%|          | 0/19 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:31:12,087][0m Trial 16 pruned. [0m


Val Loss: 15.3656 - Val Accuracy: 0.7197
Learning rate for Loss: 0.0005731551181610502
Learning rate: 5.5398093359728674e-05
Weight decay: 0.001853007833324295
Epsilon: 7.937463384005181e-09
Batch size: 146
Number of epochs: 30


Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Training:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch: 1/30 - Loss: 12.3290 - Accuracy: 0.7772


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:31:23,441][0m Trial 17 pruned. [0m


Val Loss: 5.3311 - Val Accuracy: 0.9358
Learning rate for Loss: 0.0013714783849564363
Learning rate: 0.00040200013758646356
Weight decay: 0.0007664988765531446
Epsilon: 1.6990237645890134e-09
Batch size: 298
Number of epochs: 50


Epochs:   0%|          | 0/50 [00:00<?, ?it/s]

Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 1/50 - Loss: 6.6059 - Accuracy: 0.8772


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.7851 - Val Accuracy: 0.9567


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 2/50 - Loss: 2.6880 - Accuracy: 0.9542


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.5475 - Val Accuracy: 0.9583


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 3/50 - Loss: 2.3239 - Accuracy: 0.9617


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2827 - Val Accuracy: 0.9642


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 4/50 - Loss: 2.0902 - Accuracy: 0.9635


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3487 - Val Accuracy: 0.9589


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 5/50 - Loss: 1.9760 - Accuracy: 0.9661


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1141 - Val Accuracy: 0.9675


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 6/50 - Loss: 1.8404 - Accuracy: 0.9685


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1102 - Val Accuracy: 0.9634


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 7/50 - Loss: 1.9347 - Accuracy: 0.9659


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2026 - Val Accuracy: 0.9657


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 8/50 - Loss: 1.7211 - Accuracy: 0.9699


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1563 - Val Accuracy: 0.9634


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 9/50 - Loss: 1.7055 - Accuracy: 0.9692


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1955 - Val Accuracy: 0.9612


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 10/50 - Loss: 1.6537 - Accuracy: 0.9696


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1951 - Val Accuracy: 0.9635


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 11/50 - Loss: 1.6819 - Accuracy: 0.9687


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1666 - Val Accuracy: 0.9630


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 12/50 - Loss: 1.5332 - Accuracy: 0.9719


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2029 - Val Accuracy: 0.9635


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 13/50 - Loss: 1.5061 - Accuracy: 0.9728


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1757 - Val Accuracy: 0.9606


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 14/50 - Loss: 1.4683 - Accuracy: 0.9713


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2153 - Val Accuracy: 0.9618


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 15/50 - Loss: 1.4244 - Accuracy: 0.9740


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1577 - Val Accuracy: 0.9619


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 16/50 - Loss: 1.3885 - Accuracy: 0.9737


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2282 - Val Accuracy: 0.9623


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 17/50 - Loss: 1.3724 - Accuracy: 0.9745


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1841 - Val Accuracy: 0.9601


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 18/50 - Loss: 1.3316 - Accuracy: 0.9742


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1673 - Val Accuracy: 0.9601


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 19/50 - Loss: 1.3626 - Accuracy: 0.9749


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1636 - Val Accuracy: 0.9641


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 20/50 - Loss: 1.2947 - Accuracy: 0.9747


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1682 - Val Accuracy: 0.9641


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 21/50 - Loss: 1.3413 - Accuracy: 0.9732


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1538 - Val Accuracy: 0.9639


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 22/50 - Loss: 1.3146 - Accuracy: 0.9747


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2787 - Val Accuracy: 0.9619


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 23/50 - Loss: 1.3283 - Accuracy: 0.9731


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.4136 - Val Accuracy: 0.9641


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 24/50 - Loss: 1.2491 - Accuracy: 0.9752


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1636 - Val Accuracy: 0.9624


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 25/50 - Loss: 1.2320 - Accuracy: 0.9753


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.4055 - Val Accuracy: 0.9600


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 26/50 - Loss: 1.3506 - Accuracy: 0.9725


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2637 - Val Accuracy: 0.9628


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 27/50 - Loss: 1.2031 - Accuracy: 0.9764


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2067 - Val Accuracy: 0.9594


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 28/50 - Loss: 1.1864 - Accuracy: 0.9765


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1940 - Val Accuracy: 0.9611


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 29/50 - Loss: 1.1443 - Accuracy: 0.9772


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2503 - Val Accuracy: 0.9618


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 30/50 - Loss: 1.1408 - Accuracy: 0.9764


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1457 - Val Accuracy: 0.9622


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 31/50 - Loss: 1.1871 - Accuracy: 0.9767


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2768 - Val Accuracy: 0.9607


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 32/50 - Loss: 1.1497 - Accuracy: 0.9779


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2102 - Val Accuracy: 0.9623


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 33/50 - Loss: 1.0928 - Accuracy: 0.9777


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2130 - Val Accuracy: 0.9635


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 34/50 - Loss: 1.1793 - Accuracy: 0.9761


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2405 - Val Accuracy: 0.9647


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 35/50 - Loss: 1.0878 - Accuracy: 0.9777


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3094 - Val Accuracy: 0.9627


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 36/50 - Loss: 1.0921 - Accuracy: 0.9769


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3811 - Val Accuracy: 0.9572


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 37/50 - Loss: 1.1293 - Accuracy: 0.9757


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2991 - Val Accuracy: 0.9610


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 38/50 - Loss: 1.1123 - Accuracy: 0.9773


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2240 - Val Accuracy: 0.9639


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 39/50 - Loss: 1.0978 - Accuracy: 0.9766


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1984 - Val Accuracy: 0.9646


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 40/50 - Loss: 1.0422 - Accuracy: 0.9784


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2224 - Val Accuracy: 0.9617


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 41/50 - Loss: 1.0219 - Accuracy: 0.9785


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1888 - Val Accuracy: 0.9611


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 42/50 - Loss: 1.0239 - Accuracy: 0.9769


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2870 - Val Accuracy: 0.9623


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 43/50 - Loss: 1.0184 - Accuracy: 0.9780


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2382 - Val Accuracy: 0.9630


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 44/50 - Loss: 0.9874 - Accuracy: 0.9781


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1854 - Val Accuracy: 0.9635


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 45/50 - Loss: 0.9666 - Accuracy: 0.9795


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3500 - Val Accuracy: 0.9613


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 46/50 - Loss: 1.0965 - Accuracy: 0.9773


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.1965 - Val Accuracy: 0.9663


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 47/50 - Loss: 1.0413 - Accuracy: 0.9772


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3602 - Val Accuracy: 0.9590


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 48/50 - Loss: 1.0481 - Accuracy: 0.9772


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.2553 - Val Accuracy: 0.9642


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 49/50 - Loss: 0.9430 - Accuracy: 0.9797


  0%|          | 0/6 [00:00<?, ?it/s]

Val Loss: 2.3254 - Val Accuracy: 0.9601


Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 50/50 - Loss: 0.9682 - Accuracy: 0.9782


  0%|          | 0/6 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:40:51,562][0m Trial 18 finished with value: 0.9629862904548645 and parameters: {'loss_learning_rate': 0.0013714783849564363, 'learning_rate': 0.00040200013758646356, 'weight_decay': 0.0007664988765531446, 'epsilon': 1.6990237645890134e-09, 'batch_size': 298, 'epochs': 50}. Best is trial 9 with value: 0.9682112336158752.[0m


Val Loss: 2.1592 - Val Accuracy: 0.9630
Learning rate for Loss: 0.001277025779749722
Learning rate: 0.01448714693104777
Weight decay: 0.0007134429071709082
Epsilon: 1.44401038497184e-09
Batch size: 298
Number of epochs: 89


Epochs:   0%|          | 0/89 [00:00<?, ?it/s]

Training:   0%|          | 0/47 [00:00<?, ?it/s]

Epoch: 1/89 - Loss: 8.4961 - Accuracy: 0.8723


  0%|          | 0/6 [00:00<?, ?it/s]

[32m[I 2023-12-08 03:41:00,083][0m Trial 19 pruned. [0m


Val Loss: 3.1209 - Val Accuracy: 0.9545

Study statistics: 
  Number of finished trials:  20
  Number of pruned trials:  11
  Number of complete trials:  9


In [1]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:


NameError: name 'study' is not defined

In [None]:
# ViT P12-S8 CosFace Mean

Best trial:
Value:  0.9682112336158752
Params:
batch_size: 116
epochs: 93
epsilon: 1.3447060538362165e-09
learning_rate: 0.0003216964687818695
loss_learning_rate: 0.0016836427763545412
weight_decay: 0.003288796642704667