In [20]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from pytorch_metric_learning import losses
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm.notebook import tqdm

In [2]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/")
val_data = AdienceDataset("../val.csv", "../cropped_Adience/")

In [7]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [8]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [9]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [10]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [11]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [12]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [13]:
class ViTs_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, ac_patch_size,
                         pad, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * ac_patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size
        self.soft_split = nn.Unfold(kernel_size=(ac_patch_size, ac_patch_size), stride=(self.patch_size, self.patch_size), padding=(pad, pad))


        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'Softmax':
                self.loss = Softmax(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'ArcFace':
                self.loss = ArcFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'SFace':
                self.loss = SFaceLoss(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label= None , mask = None):
        p = self.patch_size
        x = self.soft_split(img).transpose(1, 2)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        y = x[:, 0]
        z = x[:, 1:].mean(dim = 1)

        y = self.to_latent(y)
        emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        emb = torch.cat((emb_y, emb_z), dim=1)
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [14]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=1024, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [15]:
model = ViTs_face(
            loss_type='CosFace',
            GPU_ID=[device],
            num_class=93431,
            image_size=112,
            patch_size=8,
            ac_patch_size=12,
            pad=4,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Face-Transformer/results/ViT-P12S8_ms1m_cosface/Backbone_VITs_Epoch_2_Batch_12000_Time_2021-03-17-04-05_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=0)]


<All keys matched successfully>

In [16]:
for param in model.parameters():
    param.requires_grad = False

In [17]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, _, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, _, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [23]:
best_accu = 0.9650763273239136
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    loss_lr = trial.suggest_float("loss_learning_rate", 1e-4, 1e-2, log=True)
    arc_margin = losses.ArcFaceLoss(2, 1024).to(device)
    loss_optimizer = opt.AdamW(arc_margin.parameters(), lr=loss_lr)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    criterion = nn.CrossEntropyLoss()
    
    batch_size = trial.suggest_int('batch_size', 50, 300)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate for Loss: "+ str(loss_lr))
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, label, file in tqdm(train_loader, desc="Training", leave=False):
            img, label = img.to(device), label.to(device)

            x = file_to_embed(embeds, file)
            
            optimizer.zero_grad()
            embed, output = model_xtr(x)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            class_loss = criterion(output, label)
            arc_loss = arc_margin(embed, label)
            loss = class_loss + arc_loss
            loss.backward()
            loss_optimizer.step()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, label, file in tqdm(val_loader):
                img, label = img.to(device), label.to(device)
                
                x = file_to_embed(embeds, file)
                
                embed, output = model_xtr(x)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                class_loss = criterion(output, label)
                arc_loss = arc_margin(embed, label)
                loss = class_loss + arc_loss
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_12-8_arcface_mean.pt")
            
    return val_accu

In [24]:
study = optuna.create_study(direction='maximize',
                            study_name='arcface-12-8-mean-vit-study',
                            storage='sqlite:///study1.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=10)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-08 01:20:42,857][0m Using an existing study with name 'arcface-12-8-mean-vit-study' instead of creating a new one.[0m


Learning rate for Loss: 0.00010541490185043079
Learning rate: 0.0015422779855132805
Weight decay: 0.007201232402516741
Epsilon: 2.3282310447997555e-08
Batch size: 148
Number of epochs: 97


Epochs:   0%|          | 0/97 [00:00<?, ?it/s]

Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 1/97 - Loss: 4.2598 - Accuracy: 0.8995


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 2.0412 - Val Accuracy: 0.9525


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 2/97 - Loss: 1.9264 - Accuracy: 0.9559


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.8341 - Val Accuracy: 0.9576


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 3/97 - Loss: 1.8388 - Accuracy: 0.9529


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 2.5684 - Val Accuracy: 0.8864


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 4/97 - Loss: 1.6195 - Accuracy: 0.9549


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.8128 - Val Accuracy: 0.9611


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 5/97 - Loss: 1.4572 - Accuracy: 0.9647


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5843 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 6/97 - Loss: 1.3702 - Accuracy: 0.9659


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.6050 - Val Accuracy: 0.9622


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 7/97 - Loss: 1.3487 - Accuracy: 0.9647


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5704 - Val Accuracy: 0.9619


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 8/97 - Loss: 1.2297 - Accuracy: 0.9691


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5711 - Val Accuracy: 0.9615


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 9/97 - Loss: 1.2287 - Accuracy: 0.9685


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5716 - Val Accuracy: 0.9591


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 10/97 - Loss: 1.1970 - Accuracy: 0.9673


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5371 - Val Accuracy: 0.9642


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 11/97 - Loss: 1.2031 - Accuracy: 0.9627


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.7676 - Val Accuracy: 0.9535


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 12/97 - Loss: 1.1690 - Accuracy: 0.9665


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.8109 - Val Accuracy: 0.9545


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 13/97 - Loss: 1.1209 - Accuracy: 0.9653


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5973 - Val Accuracy: 0.9611


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 14/97 - Loss: 1.0654 - Accuracy: 0.9676


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4555 - Val Accuracy: 0.9627


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 15/97 - Loss: 1.0526 - Accuracy: 0.9690


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4555 - Val Accuracy: 0.9613


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 16/97 - Loss: 0.9805 - Accuracy: 0.9722


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5634 - Val Accuracy: 0.9606


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 17/97 - Loss: 0.9957 - Accuracy: 0.9694


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3888 - Val Accuracy: 0.9594


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 18/97 - Loss: 0.9413 - Accuracy: 0.9746


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3658 - Val Accuracy: 0.9638


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 19/97 - Loss: 0.9461 - Accuracy: 0.9733


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5075 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 20/97 - Loss: 0.9417 - Accuracy: 0.9730


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5886 - Val Accuracy: 0.9568


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 21/97 - Loss: 0.9515 - Accuracy: 0.9717


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3724 - Val Accuracy: 0.9656


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 22/97 - Loss: 0.8933 - Accuracy: 0.9729


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3215 - Val Accuracy: 0.9480


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 23/97 - Loss: 0.8869 - Accuracy: 0.9737


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3484 - Val Accuracy: 0.9613


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 24/97 - Loss: 0.8701 - Accuracy: 0.9755


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3834 - Val Accuracy: 0.9501


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 25/97 - Loss: 0.8839 - Accuracy: 0.9734


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3734 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 26/97 - Loss: 0.8994 - Accuracy: 0.9720


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4945 - Val Accuracy: 0.9630


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 27/97 - Loss: 0.8927 - Accuracy: 0.9708


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3402 - Val Accuracy: 0.9624


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 28/97 - Loss: 0.8540 - Accuracy: 0.9721


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.2899 - Val Accuracy: 0.9627


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 29/97 - Loss: 0.8104 - Accuracy: 0.9747


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4253 - Val Accuracy: 0.9503


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 30/97 - Loss: 0.8612 - Accuracy: 0.9732


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3768 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 31/97 - Loss: 0.8120 - Accuracy: 0.9767


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3654 - Val Accuracy: 0.9579


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 32/97 - Loss: 0.8687 - Accuracy: 0.9717


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4096 - Val Accuracy: 0.9535


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 33/97 - Loss: 0.7907 - Accuracy: 0.9764


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3390 - Val Accuracy: 0.9606


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 34/97 - Loss: 0.8228 - Accuracy: 0.9760


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3112 - Val Accuracy: 0.9622


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 35/97 - Loss: 0.7759 - Accuracy: 0.9758


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4306 - Val Accuracy: 0.9611


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 36/97 - Loss: 0.8060 - Accuracy: 0.9771


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.2759 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 37/97 - Loss: 0.8245 - Accuracy: 0.9734


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5401 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 38/97 - Loss: 0.8101 - Accuracy: 0.9754


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3813 - Val Accuracy: 0.9622


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 39/97 - Loss: 0.7829 - Accuracy: 0.9765


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4059 - Val Accuracy: 0.9494


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 40/97 - Loss: 0.7820 - Accuracy: 0.9763


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.6682 - Val Accuracy: 0.9524


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 41/97 - Loss: 0.7943 - Accuracy: 0.9762


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4470 - Val Accuracy: 0.9638


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 42/97 - Loss: 0.8024 - Accuracy: 0.9765


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4167 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 43/97 - Loss: 0.7681 - Accuracy: 0.9775


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4062 - Val Accuracy: 0.9622


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 44/97 - Loss: 0.7819 - Accuracy: 0.9777


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3283 - Val Accuracy: 0.9651


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 45/97 - Loss: 0.7753 - Accuracy: 0.9781


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4438 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 46/97 - Loss: 0.7872 - Accuracy: 0.9766


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3410 - Val Accuracy: 0.9598


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 47/97 - Loss: 0.8406 - Accuracy: 0.9776


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.7062 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 48/97 - Loss: 0.7856 - Accuracy: 0.9773


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3901 - Val Accuracy: 0.9619


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 49/97 - Loss: 0.7885 - Accuracy: 0.9789


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3417 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 50/97 - Loss: 0.7487 - Accuracy: 0.9776


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4283 - Val Accuracy: 0.9645


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 51/97 - Loss: 0.7892 - Accuracy: 0.9774


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3811 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 52/97 - Loss: 0.7839 - Accuracy: 0.9780


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3827 - Val Accuracy: 0.9669


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 53/97 - Loss: 0.7902 - Accuracy: 0.9782


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5354 - Val Accuracy: 0.9490


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 54/97 - Loss: 0.7878 - Accuracy: 0.9767


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3843 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 55/97 - Loss: 0.7874 - Accuracy: 0.9761


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4424 - Val Accuracy: 0.9603


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 56/97 - Loss: 0.8578 - Accuracy: 0.9762


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4013 - Val Accuracy: 0.9634


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 57/97 - Loss: 0.7867 - Accuracy: 0.9766


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4497 - Val Accuracy: 0.9587


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 58/97 - Loss: 0.8332 - Accuracy: 0.9749


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5032 - Val Accuracy: 0.9634


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 59/97 - Loss: 0.7875 - Accuracy: 0.9783


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4430 - Val Accuracy: 0.9565


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 60/97 - Loss: 0.8015 - Accuracy: 0.9774


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4868 - Val Accuracy: 0.9634


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 61/97 - Loss: 0.7836 - Accuracy: 0.9803


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4108 - Val Accuracy: 0.9615


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 62/97 - Loss: 0.7720 - Accuracy: 0.9770


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4807 - Val Accuracy: 0.9638


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 63/97 - Loss: 0.7604 - Accuracy: 0.9782


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3941 - Val Accuracy: 0.9649


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 64/97 - Loss: 0.7380 - Accuracy: 0.9773


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4988 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 65/97 - Loss: 0.8695 - Accuracy: 0.9790


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5207 - Val Accuracy: 0.9651


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 66/97 - Loss: 0.7721 - Accuracy: 0.9794


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4338 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 67/97 - Loss: 0.7461 - Accuracy: 0.9793


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5467 - Val Accuracy: 0.9542


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 68/97 - Loss: 0.7590 - Accuracy: 0.9781


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3897 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 69/97 - Loss: 0.7196 - Accuracy: 0.9780


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4110 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 70/97 - Loss: 0.7310 - Accuracy: 0.9788


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4914 - Val Accuracy: 0.9598


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 71/97 - Loss: 0.7203 - Accuracy: 0.9800


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4839 - Val Accuracy: 0.9600


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 72/97 - Loss: 0.6927 - Accuracy: 0.9773


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4534 - Val Accuracy: 0.9615


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 73/97 - Loss: 0.7545 - Accuracy: 0.9779


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4236 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 74/97 - Loss: 0.7169 - Accuracy: 0.9790


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4960 - Val Accuracy: 0.9627


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 75/97 - Loss: 0.7217 - Accuracy: 0.9782


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5787 - Val Accuracy: 0.9621


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 76/97 - Loss: 0.7508 - Accuracy: 0.9749


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5817 - Val Accuracy: 0.9484


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 77/97 - Loss: 0.7076 - Accuracy: 0.9746


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4619 - Val Accuracy: 0.9632


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 78/97 - Loss: 0.7145 - Accuracy: 0.9800


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4328 - Val Accuracy: 0.9655


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 79/97 - Loss: 0.7049 - Accuracy: 0.9794


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4269 - Val Accuracy: 0.9604


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 80/97 - Loss: 0.7329 - Accuracy: 0.9780


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5323 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 81/97 - Loss: 0.6965 - Accuracy: 0.9766


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3451 - Val Accuracy: 0.9628


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 82/97 - Loss: 0.6920 - Accuracy: 0.9785


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4824 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 83/97 - Loss: 0.6817 - Accuracy: 0.9790


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.3721 - Val Accuracy: 0.9634


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 84/97 - Loss: 0.6955 - Accuracy: 0.9790


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5317 - Val Accuracy: 0.9610


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 85/97 - Loss: 0.7213 - Accuracy: 0.9775


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5131 - Val Accuracy: 0.9587


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 86/97 - Loss: 0.6572 - Accuracy: 0.9802


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4565 - Val Accuracy: 0.9604


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 87/97 - Loss: 0.6601 - Accuracy: 0.9779


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5062 - Val Accuracy: 0.9655


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 88/97 - Loss: 0.7133 - Accuracy: 0.9771


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.2851 - Val Accuracy: 0.9591


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 89/97 - Loss: 0.6781 - Accuracy: 0.9806


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4303 - Val Accuracy: 0.9611


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 90/97 - Loss: 0.7303 - Accuracy: 0.9765


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.4535 - Val Accuracy: 0.9572


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 91/97 - Loss: 0.7081 - Accuracy: 0.9762


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5735 - Val Accuracy: 0.9593


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 92/97 - Loss: 0.6980 - Accuracy: 0.9768


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5825 - Val Accuracy: 0.9525


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 93/97 - Loss: 0.6418 - Accuracy: 0.9802


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.5727 - Val Accuracy: 0.9611


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 94/97 - Loss: 0.6353 - Accuracy: 0.9811


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.6252 - Val Accuracy: 0.9614


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 95/97 - Loss: 0.6487 - Accuracy: 0.9792


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.6126 - Val Accuracy: 0.9627


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 96/97 - Loss: 0.6141 - Accuracy: 0.9818


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 1.6012 - Val Accuracy: 0.9639


Training:   0%|          | 0/95 [00:00<?, ?it/s]

Epoch: 97/97 - Loss: 0.6659 - Accuracy: 0.9788


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-08 01:42:40,222][0m Trial 11 finished with value: 0.9592766165733337 and parameters: {'loss_learning_rate': 0.00010541490185043079, 'learning_rate': 0.0015422779855132805, 'weight_decay': 0.007201232402516741, 'epsilon': 2.3282310447997555e-08, 'batch_size': 148, 'epochs': 97}. Best is trial 8 with value: 0.9650763273239136.[0m


Val Loss: 1.5203 - Val Accuracy: 0.9593
Learning rate for Loss: 0.0011183571128045792
Learning rate: 0.00046565910863163146
Weight decay: 0.00041985539911469573
Epsilon: 1.3174033890686761e-09
Batch size: 159
Number of epochs: 34


Epochs:   0%|          | 0/34 [00:00<?, ?it/s]

Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 1/34 - Loss: 6.9851 - Accuracy: 0.8980


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 2.5381 - Val Accuracy: 0.9507


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 2/34 - Loss: 2.2794 - Accuracy: 0.9556


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.9344 - Val Accuracy: 0.9610


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 3/34 - Loss: 1.7514 - Accuracy: 0.9606


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.6304 - Val Accuracy: 0.9599


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 4/34 - Loss: 1.5034 - Accuracy: 0.9610


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.4735 - Val Accuracy: 0.9616


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 5/34 - Loss: 1.3644 - Accuracy: 0.9655


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.4562 - Val Accuracy: 0.9605


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 6/34 - Loss: 1.3306 - Accuracy: 0.9664


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.4912 - Val Accuracy: 0.9559


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 7/34 - Loss: 1.3181 - Accuracy: 0.9647


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.4634 - Val Accuracy: 0.9576


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 8/34 - Loss: 1.2471 - Accuracy: 0.9696


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.4262 - Val Accuracy: 0.9605


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 9/34 - Loss: 1.1614 - Accuracy: 0.9704


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 1.3647 - Val Accuracy: 0.9605


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 10/34 - Loss: 1.1072 - Accuracy: 0.9684


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-12-08 01:44:56,495][0m Trial 12 pruned. [0m


Val Loss: 1.4163 - Val Accuracy: 0.9547
Learning rate for Loss: 0.00012094954207862317
Learning rate: 4.3815174723971e-05
Weight decay: 0.0071465488778564325
Epsilon: 3.584843766193412e-09
Batch size: 287
Number of epochs: 60


Epochs:   0%|          | 0/60 [00:00<?, ?it/s]

Training:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch: 1/60 - Loss: 23.7821 - Accuracy: 0.6564


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-08 01:45:11,611][0m Trial 13 pruned. [0m


Val Loss: 17.9591 - Val Accuracy: 0.7898
Learning rate for Loss: 0.00041072702233964386
Learning rate: 0.0014334814032043784
Weight decay: 0.0004635056750422466
Epsilon: 4.066246475865317e-09
Batch size: 186
Number of epochs: 34


Epochs:   0%|          | 0/34 [00:00<?, ?it/s]

Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 1/34 - Loss: 5.6324 - Accuracy: 0.8877


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2523 - Val Accuracy: 0.9559


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 2/34 - Loss: 2.1219 - Accuracy: 0.9528


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.9798 - Val Accuracy: 0.9505


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 3/34 - Loss: 1.7874 - Accuracy: 0.9552


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.7248 - Val Accuracy: 0.9565


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 4/34 - Loss: 1.5007 - Accuracy: 0.9625


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.6372 - Val Accuracy: 0.9587


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 5/34 - Loss: 1.3728 - Accuracy: 0.9641


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.5237 - Val Accuracy: 0.9619


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 6/34 - Loss: 1.2821 - Accuracy: 0.9626


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.7021 - Val Accuracy: 0.9460


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 7/34 - Loss: 1.2781 - Accuracy: 0.9620


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4834 - Val Accuracy: 0.9619


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 8/34 - Loss: 1.1959 - Accuracy: 0.9645


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3531 - Val Accuracy: 0.9600


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 9/34 - Loss: 1.0690 - Accuracy: 0.9693


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.5238 - Val Accuracy: 0.9589


Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 10/34 - Loss: 1.1247 - Accuracy: 0.9694


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-12-08 01:47:30,237][0m Trial 14 pruned. [0m


Val Loss: 1.3765 - Val Accuracy: 0.9548
Learning rate for Loss: 0.0017991353418715592
Learning rate: 0.00010392554500142659
Weight decay: 0.00022391260301509827
Epsilon: 2.8503521362335996e-08
Batch size: 252
Number of epochs: 33


Epochs:   0%|          | 0/33 [00:00<?, ?it/s]

Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 1/33 - Loss: 17.3387 - Accuracy: 0.7356


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-08 01:47:43,754][0m Trial 15 pruned. [0m


Val Loss: 8.5609 - Val Accuracy: 0.9243
Learning rate for Loss: 0.0005600636054487386
Learning rate: 0.0006893130008138685
Weight decay: 0.0003284487972257048
Epsilon: 1.0328837742816255e-09
Batch size: 127
Number of epochs: 56


Epochs:   0%|          | 0/56 [00:00<?, ?it/s]

Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 1/56 - Loss: 5.3010 - Accuracy: 0.9140


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 2.1827 - Val Accuracy: 0.9476


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 2/56 - Loss: 2.0120 - Accuracy: 0.9549


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.8165 - Val Accuracy: 0.9553


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 3/56 - Loss: 1.8017 - Accuracy: 0.9549


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.6186 - Val Accuracy: 0.9627


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 4/56 - Loss: 1.6130 - Accuracy: 0.9528


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.5069 - Val Accuracy: 0.9540


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 5/56 - Loss: 1.3192 - Accuracy: 0.9620


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3542 - Val Accuracy: 0.9600


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 6/56 - Loss: 1.3322 - Accuracy: 0.9633


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4862 - Val Accuracy: 0.9394


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 7/56 - Loss: 1.3195 - Accuracy: 0.9582


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4115 - Val Accuracy: 0.9636


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 8/56 - Loss: 1.2171 - Accuracy: 0.9686


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4172 - Val Accuracy: 0.9612


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 9/56 - Loss: 1.1841 - Accuracy: 0.9699


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4119 - Val Accuracy: 0.9606


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 10/56 - Loss: 1.2088 - Accuracy: 0.9710


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3689 - Val Accuracy: 0.9627


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 11/56 - Loss: 1.0853 - Accuracy: 0.9728


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3592 - Val Accuracy: 0.9632


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 12/56 - Loss: 1.0827 - Accuracy: 0.9718


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4741 - Val Accuracy: 0.9542


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 13/56 - Loss: 1.0567 - Accuracy: 0.9715


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2593 - Val Accuracy: 0.9643


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 14/56 - Loss: 1.0245 - Accuracy: 0.9714


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3304 - Val Accuracy: 0.9640


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 15/56 - Loss: 1.0276 - Accuracy: 0.9725


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2846 - Val Accuracy: 0.9628


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 16/56 - Loss: 0.9899 - Accuracy: 0.9728


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3055 - Val Accuracy: 0.9612


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 17/56 - Loss: 0.9642 - Accuracy: 0.9728


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3340 - Val Accuracy: 0.9596


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 18/56 - Loss: 0.9917 - Accuracy: 0.9707


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3707 - Val Accuracy: 0.9478


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 19/56 - Loss: 1.0829 - Accuracy: 0.9694


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3308 - Val Accuracy: 0.9625


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 20/56 - Loss: 0.9949 - Accuracy: 0.9740


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3457 - Val Accuracy: 0.9602


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 21/56 - Loss: 0.9455 - Accuracy: 0.9721


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4490 - Val Accuracy: 0.9540


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 22/56 - Loss: 0.9595 - Accuracy: 0.9742


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3461 - Val Accuracy: 0.9602


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 23/56 - Loss: 0.9871 - Accuracy: 0.9733


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.5346 - Val Accuracy: 0.9550


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 24/56 - Loss: 0.9378 - Accuracy: 0.9739


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2798 - Val Accuracy: 0.9625


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 25/56 - Loss: 0.9463 - Accuracy: 0.9720


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3341 - Val Accuracy: 0.9621


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 26/56 - Loss: 0.8895 - Accuracy: 0.9772


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3962 - Val Accuracy: 0.9598


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 27/56 - Loss: 0.8914 - Accuracy: 0.9748


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4120 - Val Accuracy: 0.9610


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 28/56 - Loss: 0.9706 - Accuracy: 0.9721


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4542 - Val Accuracy: 0.9386


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 29/56 - Loss: 1.0583 - Accuracy: 0.9638


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2176 - Val Accuracy: 0.9640


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 30/56 - Loss: 0.9123 - Accuracy: 0.9766


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4979 - Val Accuracy: 0.9585


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 31/56 - Loss: 1.0026 - Accuracy: 0.9753


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3281 - Val Accuracy: 0.9628


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 32/56 - Loss: 0.8531 - Accuracy: 0.9764


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2617 - Val Accuracy: 0.9641


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 33/56 - Loss: 0.8038 - Accuracy: 0.9786


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4086 - Val Accuracy: 0.9638


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 34/56 - Loss: 0.8623 - Accuracy: 0.9756


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2897 - Val Accuracy: 0.9615


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 35/56 - Loss: 0.8889 - Accuracy: 0.9734


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2846 - Val Accuracy: 0.9666


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 36/56 - Loss: 0.7934 - Accuracy: 0.9782


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2297 - Val Accuracy: 0.9615


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 37/56 - Loss: 0.8440 - Accuracy: 0.9760


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3026 - Val Accuracy: 0.9627


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 38/56 - Loss: 0.8460 - Accuracy: 0.9774


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3103 - Val Accuracy: 0.9623


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 39/56 - Loss: 0.8335 - Accuracy: 0.9734


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2848 - Val Accuracy: 0.9632


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 40/56 - Loss: 0.7577 - Accuracy: 0.9790


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3127 - Val Accuracy: 0.9625


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 41/56 - Loss: 0.7991 - Accuracy: 0.9762


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3986 - Val Accuracy: 0.9623


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 42/56 - Loss: 0.8059 - Accuracy: 0.9764


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.2683 - Val Accuracy: 0.9610


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 43/56 - Loss: 0.7891 - Accuracy: 0.9777


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.5093 - Val Accuracy: 0.9632


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 44/56 - Loss: 0.9500 - Accuracy: 0.9769


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4025 - Val Accuracy: 0.9619


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 45/56 - Loss: 0.7583 - Accuracy: 0.9806


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3318 - Val Accuracy: 0.9643


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 46/56 - Loss: 0.7551 - Accuracy: 0.9789


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4137 - Val Accuracy: 0.9621


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 47/56 - Loss: 0.8093 - Accuracy: 0.9772


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.5466 - Val Accuracy: 0.9493


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 48/56 - Loss: 0.8203 - Accuracy: 0.9767


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3304 - Val Accuracy: 0.9621


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 49/56 - Loss: 0.7743 - Accuracy: 0.9799


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3803 - Val Accuracy: 0.9627


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 50/56 - Loss: 0.7702 - Accuracy: 0.9792


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3727 - Val Accuracy: 0.9602


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 51/56 - Loss: 0.7465 - Accuracy: 0.9779


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4856 - Val Accuracy: 0.9610


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 52/56 - Loss: 0.7215 - Accuracy: 0.9791


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.4362 - Val Accuracy: 0.9632


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 53/56 - Loss: 0.7074 - Accuracy: 0.9797


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3569 - Val Accuracy: 0.9617


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 54/56 - Loss: 0.7071 - Accuracy: 0.9789


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3690 - Val Accuracy: 0.9619


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 55/56 - Loss: 0.6873 - Accuracy: 0.9809


  0%|          | 0/14 [00:00<?, ?it/s]

Val Loss: 1.3751 - Val Accuracy: 0.9613


Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 56/56 - Loss: 0.6970 - Accuracy: 0.9794


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:00:05,761][0m Trial 16 finished with value: 0.9604024887084961 and parameters: {'loss_learning_rate': 0.0005600636054487386, 'learning_rate': 0.0006893130008138685, 'weight_decay': 0.0003284487972257048, 'epsilon': 1.0328837742816255e-09, 'batch_size': 127, 'epochs': 56}. Best is trial 8 with value: 0.9650763273239136.[0m


Val Loss: 1.5448 - Val Accuracy: 0.9604
Learning rate for Loss: 0.0025820473725166253
Learning rate: 1.716080943527789e-05
Weight decay: 0.0015089076395223247
Epsilon: 2.942018963378978e-09
Batch size: 220
Number of epochs: 91


Epochs:   0%|          | 0/91 [00:00<?, ?it/s]

Training:   0%|          | 0/64 [00:00<?, ?it/s]

Epoch: 1/91 - Loss: 26.4611 - Accuracy: 0.5439


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:00:20,910][0m Trial 17 pruned. [0m


Val Loss: 21.6317 - Val Accuracy: 0.5946
Learning rate for Loss: 0.00016318974117700738
Learning rate: 0.0034765136577340457
Weight decay: 0.0040660692082720796
Epsilon: 7.965890591470585e-09
Batch size: 271
Number of epochs: 42


Epochs:   0%|          | 0/42 [00:00<?, ?it/s]

Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 1/42 - Loss: 4.8043 - Accuracy: 0.8855


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.1293 - Val Accuracy: 0.9526


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 2/42 - Loss: 2.0199 - Accuracy: 0.9496


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.9532 - Val Accuracy: 0.9609


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 3/42 - Loss: 1.7272 - Accuracy: 0.9600


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.8397 - Val Accuracy: 0.9632


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 4/42 - Loss: 1.6073 - Accuracy: 0.9609


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.1562 - Val Accuracy: 0.9225


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 5/42 - Loss: 1.5849 - Accuracy: 0.9601


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.8498 - Val Accuracy: 0.9469


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 6/42 - Loss: 1.5023 - Accuracy: 0.9613


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.7470 - Val Accuracy: 0.9464


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 7/42 - Loss: 1.4589 - Accuracy: 0.9572


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 2.0990 - Val Accuracy: 0.9394


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 8/42 - Loss: 1.4004 - Accuracy: 0.9610


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.5950 - Val Accuracy: 0.9604


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 9/42 - Loss: 1.2468 - Accuracy: 0.9676


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.5680 - Val Accuracy: 0.9492


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 10/42 - Loss: 1.1738 - Accuracy: 0.9685


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.5585 - Val Accuracy: 0.9615


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 11/42 - Loss: 1.1608 - Accuracy: 0.9682


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.6804 - Val Accuracy: 0.9611


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 12/42 - Loss: 1.1906 - Accuracy: 0.9675


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.6130 - Val Accuracy: 0.9608


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 13/42 - Loss: 1.2324 - Accuracy: 0.9614


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.7785 - Val Accuracy: 0.9545


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 14/42 - Loss: 1.1085 - Accuracy: 0.9700


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.5620 - Val Accuracy: 0.9625


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 15/42 - Loss: 1.1265 - Accuracy: 0.9662


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.6083 - Val Accuracy: 0.9615


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 16/42 - Loss: 1.0847 - Accuracy: 0.9692


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.8131 - Val Accuracy: 0.9341


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 17/42 - Loss: 1.0791 - Accuracy: 0.9664


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4249 - Val Accuracy: 0.9594


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 18/42 - Loss: 1.0321 - Accuracy: 0.9685


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.6565 - Val Accuracy: 0.9572


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 19/42 - Loss: 0.9629 - Accuracy: 0.9735


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.7234 - Val Accuracy: 0.9608


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 20/42 - Loss: 1.0656 - Accuracy: 0.9657


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4630 - Val Accuracy: 0.9550


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 21/42 - Loss: 0.9681 - Accuracy: 0.9694


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.3881 - Val Accuracy: 0.9652


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 22/42 - Loss: 0.9499 - Accuracy: 0.9704


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4719 - Val Accuracy: 0.9632


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 23/42 - Loss: 0.9707 - Accuracy: 0.9717


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.6535 - Val Accuracy: 0.9546


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 24/42 - Loss: 0.9663 - Accuracy: 0.9682


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.7836 - Val Accuracy: 0.9396


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 25/42 - Loss: 0.9551 - Accuracy: 0.9689


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4315 - Val Accuracy: 0.9603


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 26/42 - Loss: 0.8625 - Accuracy: 0.9744


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4758 - Val Accuracy: 0.9571


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 27/42 - Loss: 0.9348 - Accuracy: 0.9705


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.6038 - Val Accuracy: 0.9475


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 28/42 - Loss: 0.8816 - Accuracy: 0.9742


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.3930 - Val Accuracy: 0.9630


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 29/42 - Loss: 0.8159 - Accuracy: 0.9746


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.5299 - Val Accuracy: 0.9625


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 30/42 - Loss: 0.7854 - Accuracy: 0.9747


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4513 - Val Accuracy: 0.9594


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 31/42 - Loss: 0.8548 - Accuracy: 0.9724


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4290 - Val Accuracy: 0.9603


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 32/42 - Loss: 0.8112 - Accuracy: 0.9748


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.3943 - Val Accuracy: 0.9641


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 33/42 - Loss: 0.8312 - Accuracy: 0.9724


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4963 - Val Accuracy: 0.9604


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 34/42 - Loss: 0.7912 - Accuracy: 0.9784


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4635 - Val Accuracy: 0.9599


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 35/42 - Loss: 0.7909 - Accuracy: 0.9732


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4369 - Val Accuracy: 0.9560


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 36/42 - Loss: 0.8111 - Accuracy: 0.9736


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4309 - Val Accuracy: 0.9570


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 37/42 - Loss: 0.7846 - Accuracy: 0.9741


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4181 - Val Accuracy: 0.9624


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 38/42 - Loss: 0.7679 - Accuracy: 0.9740


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.4697 - Val Accuracy: 0.9562


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 39/42 - Loss: 0.7568 - Accuracy: 0.9763


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.5144 - Val Accuracy: 0.9636


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 40/42 - Loss: 0.7733 - Accuracy: 0.9742


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.3564 - Val Accuracy: 0.9645


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 41/42 - Loss: 0.7502 - Accuracy: 0.9780


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 1.3960 - Val Accuracy: 0.9646


Training:   0%|          | 0/52 [00:00<?, ?it/s]

Epoch: 42/42 - Loss: 0.7397 - Accuracy: 0.9776


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:09:59,849][0m Trial 18 finished with value: 0.9630420804023743 and parameters: {'loss_learning_rate': 0.00016318974117700738, 'learning_rate': 0.0034765136577340457, 'weight_decay': 0.0040660692082720796, 'epsilon': 7.965890591470585e-09, 'batch_size': 271, 'epochs': 42}. Best is trial 8 with value: 0.9650763273239136.[0m


Val Loss: 1.4566 - Val Accuracy: 0.9630
Learning rate for Loss: 0.00022291416620514315
Learning rate: 0.00019357356993332232
Weight decay: 0.005177366334329708
Epsilon: 2.442116266212139e-08
Batch size: 177
Number of epochs: 70


Epochs:   0%|          | 0/70 [00:00<?, ?it/s]

Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 1/70 - Loss: 11.0146 - Accuracy: 0.8513


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 4.0108 - Val Accuracy: 0.9485


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 2/70 - Loss: 3.4952 - Accuracy: 0.9464


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.7865 - Val Accuracy: 0.9589


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 3/70 - Loss: 2.6580 - Accuracy: 0.9544


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.4195 - Val Accuracy: 0.9588


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 4/70 - Loss: 2.2784 - Accuracy: 0.9598


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.2331 - Val Accuracy: 0.9609


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 5/70 - Loss: 2.0404 - Accuracy: 0.9627


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 2.1167 - Val Accuracy: 0.9571


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 6/70 - Loss: 1.8761 - Accuracy: 0.9659


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.9739 - Val Accuracy: 0.9593


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 7/70 - Loss: 1.7432 - Accuracy: 0.9664


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.9083 - Val Accuracy: 0.9588


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 8/70 - Loss: 1.6272 - Accuracy: 0.9673


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.7994 - Val Accuracy: 0.9622


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 9/70 - Loss: 1.5535 - Accuracy: 0.9691


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.7454 - Val Accuracy: 0.9634


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 10/70 - Loss: 1.4605 - Accuracy: 0.9690


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.7275 - Val Accuracy: 0.9622


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 11/70 - Loss: 1.3925 - Accuracy: 0.9694


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.6459 - Val Accuracy: 0.9637


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 12/70 - Loss: 1.3239 - Accuracy: 0.9703


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.5955 - Val Accuracy: 0.9657


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 13/70 - Loss: 1.2764 - Accuracy: 0.9717


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.6451 - Val Accuracy: 0.9592


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 14/70 - Loss: 1.2473 - Accuracy: 0.9695


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.5159 - Val Accuracy: 0.9633


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 15/70 - Loss: 1.1723 - Accuracy: 0.9718


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.5158 - Val Accuracy: 0.9663


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 16/70 - Loss: 1.1506 - Accuracy: 0.9715


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4771 - Val Accuracy: 0.9650


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 17/70 - Loss: 1.1172 - Accuracy: 0.9708


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4679 - Val Accuracy: 0.9625


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 18/70 - Loss: 1.0887 - Accuracy: 0.9717


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4038 - Val Accuracy: 0.9635


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 19/70 - Loss: 1.1072 - Accuracy: 0.9687


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4154 - Val Accuracy: 0.9615


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 20/70 - Loss: 1.0200 - Accuracy: 0.9733


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3695 - Val Accuracy: 0.9644


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 21/70 - Loss: 0.9954 - Accuracy: 0.9740


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3874 - Val Accuracy: 0.9621


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 22/70 - Loss: 0.9945 - Accuracy: 0.9744


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4389 - Val Accuracy: 0.9632


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 23/70 - Loss: 0.9698 - Accuracy: 0.9741


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3774 - Val Accuracy: 0.9633


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 24/70 - Loss: 0.9882 - Accuracy: 0.9714


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3777 - Val Accuracy: 0.9634


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 25/70 - Loss: 0.9384 - Accuracy: 0.9742


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3770 - Val Accuracy: 0.9620


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 26/70 - Loss: 0.9269 - Accuracy: 0.9756


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3458 - Val Accuracy: 0.9620


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 27/70 - Loss: 0.9230 - Accuracy: 0.9752


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3427 - Val Accuracy: 0.9622


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 28/70 - Loss: 0.9140 - Accuracy: 0.9758


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3634 - Val Accuracy: 0.9618


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 29/70 - Loss: 0.9295 - Accuracy: 0.9729


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3575 - Val Accuracy: 0.9633


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 30/70 - Loss: 0.9000 - Accuracy: 0.9764


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4009 - Val Accuracy: 0.9604


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 31/70 - Loss: 0.9102 - Accuracy: 0.9753


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4008 - Val Accuracy: 0.9593


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 32/70 - Loss: 0.9152 - Accuracy: 0.9751


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3490 - Val Accuracy: 0.9650


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 33/70 - Loss: 0.8807 - Accuracy: 0.9768


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3161 - Val Accuracy: 0.9621


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 34/70 - Loss: 0.8804 - Accuracy: 0.9773


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3742 - Val Accuracy: 0.9616


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 35/70 - Loss: 0.8927 - Accuracy: 0.9767


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3550 - Val Accuracy: 0.9621


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 36/70 - Loss: 0.8740 - Accuracy: 0.9772


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3623 - Val Accuracy: 0.9651


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 37/70 - Loss: 0.8711 - Accuracy: 0.9782


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3554 - Val Accuracy: 0.9650


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 38/70 - Loss: 0.8665 - Accuracy: 0.9764


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3835 - Val Accuracy: 0.9616


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 39/70 - Loss: 0.8658 - Accuracy: 0.9781


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3627 - Val Accuracy: 0.9644


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 40/70 - Loss: 0.8497 - Accuracy: 0.9777


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3903 - Val Accuracy: 0.9646


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 41/70 - Loss: 0.8553 - Accuracy: 0.9787


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3639 - Val Accuracy: 0.9651


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 42/70 - Loss: 0.8449 - Accuracy: 0.9775


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4009 - Val Accuracy: 0.9651


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 43/70 - Loss: 0.8330 - Accuracy: 0.9786


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3800 - Val Accuracy: 0.9608


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 44/70 - Loss: 0.8115 - Accuracy: 0.9787


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4458 - Val Accuracy: 0.9608


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 45/70 - Loss: 0.8114 - Accuracy: 0.9789


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3875 - Val Accuracy: 0.9645


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 46/70 - Loss: 0.7926 - Accuracy: 0.9797


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3836 - Val Accuracy: 0.9645


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 47/70 - Loss: 0.8439 - Accuracy: 0.9776


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3321 - Val Accuracy: 0.9627


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 48/70 - Loss: 0.8026 - Accuracy: 0.9784


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3798 - Val Accuracy: 0.9628


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 49/70 - Loss: 0.7674 - Accuracy: 0.9788


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3722 - Val Accuracy: 0.9632


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 50/70 - Loss: 0.7596 - Accuracy: 0.9788


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4554 - Val Accuracy: 0.9626


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 51/70 - Loss: 0.7620 - Accuracy: 0.9793


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3617 - Val Accuracy: 0.9644


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 52/70 - Loss: 0.7780 - Accuracy: 0.9779


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3986 - Val Accuracy: 0.9606


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 53/70 - Loss: 0.7271 - Accuracy: 0.9797


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4654 - Val Accuracy: 0.9610


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 54/70 - Loss: 0.7259 - Accuracy: 0.9795


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4513 - Val Accuracy: 0.9627


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 55/70 - Loss: 0.7313 - Accuracy: 0.9794


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3789 - Val Accuracy: 0.9645


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 56/70 - Loss: 0.7493 - Accuracy: 0.9787


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3729 - Val Accuracy: 0.9649


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 57/70 - Loss: 0.7200 - Accuracy: 0.9795


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4008 - Val Accuracy: 0.9645


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 58/70 - Loss: 0.7103 - Accuracy: 0.9792


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4150 - Val Accuracy: 0.9640


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 59/70 - Loss: 0.7100 - Accuracy: 0.9803


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3684 - Val Accuracy: 0.9639


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 60/70 - Loss: 0.6831 - Accuracy: 0.9813


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4022 - Val Accuracy: 0.9656


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 61/70 - Loss: 0.6903 - Accuracy: 0.9809


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3951 - Val Accuracy: 0.9627


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 62/70 - Loss: 0.6866 - Accuracy: 0.9805


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4115 - Val Accuracy: 0.9627


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 63/70 - Loss: 0.6967 - Accuracy: 0.9800


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4795 - Val Accuracy: 0.9661


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 64/70 - Loss: 0.6829 - Accuracy: 0.9805


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4308 - Val Accuracy: 0.9628


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 65/70 - Loss: 0.6464 - Accuracy: 0.9805


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4090 - Val Accuracy: 0.9639


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 66/70 - Loss: 0.6685 - Accuracy: 0.9807


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3990 - Val Accuracy: 0.9639


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 67/70 - Loss: 0.6909 - Accuracy: 0.9798


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3720 - Val Accuracy: 0.9651


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 68/70 - Loss: 0.6973 - Accuracy: 0.9795


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.3896 - Val Accuracy: 0.9668


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 69/70 - Loss: 0.6683 - Accuracy: 0.9797


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 1.4601 - Val Accuracy: 0.9638


Training:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 70/70 - Loss: 0.6920 - Accuracy: 0.9797


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:25:10,271][0m Trial 19 finished with value: 0.9614963531494141 and parameters: {'loss_learning_rate': 0.00022291416620514315, 'learning_rate': 0.00019357356993332232, 'weight_decay': 0.005177366334329708, 'epsilon': 2.442116266212139e-08, 'batch_size': 177, 'epochs': 70}. Best is trial 8 with value: 0.9650763273239136.[0m


Val Loss: 1.3405 - Val Accuracy: 0.9615
Learning rate for Loss: 0.0006422278978480524
Learning rate: 4.140303768377289e-05
Weight decay: 0.000641805843812785
Epsilon: 1.351306135339276e-08
Batch size: 60
Number of epochs: 26


Epochs:   0%|          | 0/26 [00:00<?, ?it/s]

Training:   0%|          | 0/233 [00:00<?, ?it/s]

Epoch: 1/26 - Loss: 15.0590 - Accuracy: 0.7947


  0%|          | 0/30 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:25:20,319][0m Trial 20 pruned. [0m


Val Loss: 6.7066 - Val Accuracy: 0.9378

Study statistics: 
  Number of finished trials:  21
  Number of pruned trials:  8
  Number of complete trials:  12


In [25]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9650763273239136
  Params: 
    batch_size: 176
    epochs: 65
    epsilon: 3.8011270784941285e-08
    learning_rate: 0.00028010062637648576
    loss_learning_rate: 0.000103219676036016
    weight_decay: 0.006448148785603258


In [None]:
# ViT P12-S8 ArcFace Mean

Best trial:
Value:  0.9650763273239136
Params: 
batch_size: 176
epochs: 65
epsilon: 3.8011270784941285e-08
learning_rate: 0.00028010062637648576
loss_learning_rate: 0.000103219676036016
weight_decay: 0.006448148785603258