In [1]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from pytorch_metric_learning import losses
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm.notebook import tqdm

In [2]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir, train=False):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
        self.is_train = train
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]
        
        if self.is_train:
            positive_list = self.img_lbls[self.img_lbls.iloc[:, 1] == label].index.values
            positive_list = np.setdiff1d(positive_list, np.array([idx]))
            positive_item = random.choice(positive_list)
            positive_img = self.img_lbls.iloc[positive_item, 0]
            pos_img_path = os.path.join(self.img_dir, positive_img)
            pos_image = mx.image.imread(pos_img_path)
            if pos_image.shape[1] != 112:
                pos_image = mx.image.resize_short(pos_image, 112)
            pos_image = mx.nd.transpose(pos_image, axes=(2,0,1))
            pos_image = torch.tensor(pos_image.asnumpy()).type(torch.FloatTensor)
            
            negative_list = self.img_lbls[self.img_lbls.iloc[:, 1] != label].index.values
            negative_item = random.choice(negative_list)
            negative_img = self.img_lbls.iloc[negative_item, 0]
            neg_img_path = os.path.join(self.img_dir, negative_img)
            neg_image = mx.image.imread(neg_img_path)
            if neg_image.shape[1] != 112:
                neg_image = mx.image.resize_short(neg_image, 112)
            neg_image = mx.nd.transpose(neg_image, axes=(2,0,1))
            neg_image = torch.tensor(neg_image.asnumpy()).type(torch.FloatTensor)
            
            return image, pos_image, neg_image, label, img_file, positive_img, negative_img

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=False)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=False)

In [21]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=True)

In [7]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()

In [8]:
class CombinedLoss(nn.Module):
    def __init__(self, beta=1.0):
        super(CombinedLoss, self).__init__()
        self.beta = beta
        self.triplet = TripletLoss(margin=1.0)
        self.classification = nn.CrossEntropyLoss()
        
    def forward(self, anchor, positive, negative, classification_out, labels):
        triplet_loss = self.triplet(anchor, positive, negative)
        classification_loss = self.classification(classification_out, labels)
        total_loss = (self.beta * triplet_loss) + classification_loss
        
        return total_loss

In [9]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [10]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [11]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [12]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [13]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [14]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [15]:
class ViTs_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, ac_patch_size,
                         pad, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * ac_patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size
        self.soft_split = nn.Unfold(kernel_size=(ac_patch_size, ac_patch_size), stride=(self.patch_size, self.patch_size), padding=(pad, pad))


        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label= None , mask = None):
        p = self.patch_size
        x = self.soft_split(img).transpose(1, 2)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        # y = x[:, 0]
        z = x[:, 1:].mean(dim = 1)

        # y = self.to_latent(y)
        # emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        # emb = torch.cat((emb_y, emb_z), dim=1)
        emb = emb_z
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [16]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=512, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [18]:
model = ViTs_face(
            loss_type='CosFace',
            GPU_ID=[device],
            num_class=93431,
            image_size=112,
            patch_size=8,
            ac_patch_size=12,
            pad=4,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Face-Transformer/results/ViT-P12S8_ms1m_cosface/Backbone_VITs_Epoch_2_Batch_12000_Time_2021-03-17-04-05_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=0)]


<All keys matched successfully>

In [19]:
for param in model.parameters():
    param.requires_grad = False

In [20]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, _, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, _, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [22]:
best_accu = 0.0
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    loss_lr = trial.suggest_float("loss_learning_rate", 1e-4, 1e-2, log=True)
    arc_margin = losses.ArcFaceLoss(2, 512).to(device)
    loss_optimizer = opt.AdamW(arc_margin.parameters(), lr=loss_lr)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    beta = trial.suggest_float("beta", 0.1, 1.0, step=0.1)
    gamma = trial.suggest_float("gamma", 0.1, 1.0, step=0.1)
    criterion = CombinedLoss(beta=beta)
    
    batch_size = trial.suggest_int('batch_size', 50, 300)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate for Loss: "+ str(loss_lr))
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Beta: "+ str(beta))
    print("Gamma: "+ str(gamma))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(train_loader, desc="Training", leave=False):
            img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)

            x1 = file_to_embed(embeds, img_file)
            x2 = file_to_embed(embeds, pos_file)
            x3 = file_to_embed(embeds, neg_file)
            
            optimizer.zero_grad()
            anchor, output = model_xtr(x1)
            pos, _ = model_xtr(x2)
            neg, _ = model_xtr(x3)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            class_triplet_loss = criterion(anchor, pos, neg, output, label)
            arc_loss = arc_margin(anchor, label)
            loss = (gamma * arc_loss) + class_triplet_loss
            loss.backward()
            loss_optimizer.step()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(val_loader):
                img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)
                
                x1 = file_to_embed(embeds, img_file)
                x2 = file_to_embed(embeds, pos_file)
                x3 = file_to_embed(embeds, neg_file)
                
                anchor, output = model_xtr(x1)
                pos, _ = model_xtr(x2)
                neg, _ = model_xtr(x3)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                class_triplet_loss = criterion(anchor, pos, neg, output, label)
                arc_loss = arc_margin(anchor, label)
                loss = (gamma * arc_loss) + class_triplet_loss
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_12-8_triplet_arcface_mean_only.pt")
            
    return val_accu

In [23]:
study = optuna.create_study(direction='maximize',
                            study_name='triplet-arcface-12-8-mean-only-vit-study',
                            storage='sqlite:///study.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=20)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-15 02:46:50,790][0m A new study created in RDB with name: triplet-arcface-12-8-mean-only-vit-study[0m


Learning rate for Loss: 0.004111957846493814
Learning rate: 0.007183827565349839
Weight decay: 0.0002562032203757086
Epsilon: 3.2600541782161438e-09
Beta: 0.9
Gamma: 0.7000000000000001
Batch size: 253
Number of epochs: 63


Epochs:   0%|          | 0/63 [00:00<?, ?it/s]

Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 1/63 - Loss: 18.9189 - Accuracy: 0.7865


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 14.7602 - Val Accuracy: 0.8086


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 2/63 - Loss: 13.6906 - Accuracy: 0.8722


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 14.5732 - Val Accuracy: 0.8496


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 3/63 - Loss: 16.0751 - Accuracy: 0.8606


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 16.7117 - Val Accuracy: 0.9109


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 4/63 - Loss: 18.6426 - Accuracy: 0.8860


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.2336 - Val Accuracy: 0.8950


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 5/63 - Loss: 19.5023 - Accuracy: 0.9010


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 16.4126 - Val Accuracy: 0.9288


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 6/63 - Loss: 19.4332 - Accuracy: 0.9132


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 18.7292 - Val Accuracy: 0.9139


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 7/63 - Loss: 18.8123 - Accuracy: 0.9247


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.5722 - Val Accuracy: 0.9190


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 8/63 - Loss: 19.9804 - Accuracy: 0.9232


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 20.6497 - Val Accuracy: 0.9194


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 9/63 - Loss: 20.8798 - Accuracy: 0.9270


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 19.7327 - Val Accuracy: 0.9298


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 10/63 - Loss: 20.5180 - Accuracy: 0.9283


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.6081 - Val Accuracy: 0.9270


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 11/63 - Loss: 22.3526 - Accuracy: 0.9262


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.1376 - Val Accuracy: 0.9066


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 12/63 - Loss: 21.4951 - Accuracy: 0.9317


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.8831 - Val Accuracy: 0.9262


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 13/63 - Loss: 20.9284 - Accuracy: 0.9179


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 19.5024 - Val Accuracy: 0.9080


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 14/63 - Loss: 20.9354 - Accuracy: 0.9274


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 20.6579 - Val Accuracy: 0.9064


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 15/63 - Loss: 21.6920 - Accuracy: 0.9305


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.1497 - Val Accuracy: 0.9394


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 16/63 - Loss: 21.4341 - Accuracy: 0.9379


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.7603 - Val Accuracy: 0.9327


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 17/63 - Loss: 21.9604 - Accuracy: 0.9289


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 20.0682 - Val Accuracy: 0.9190


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 18/63 - Loss: 21.9559 - Accuracy: 0.9324


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 20.0863 - Val Accuracy: 0.9425


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 19/63 - Loss: 21.9006 - Accuracy: 0.9322


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.5129 - Val Accuracy: 0.9190


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 20/63 - Loss: 22.6339 - Accuracy: 0.9282


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.6080 - Val Accuracy: 0.9111


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 21/63 - Loss: 22.9413 - Accuracy: 0.9344


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 20.7436 - Val Accuracy: 0.9376


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 22/63 - Loss: 22.0924 - Accuracy: 0.9347


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.5311 - Val Accuracy: 0.9425


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 23/63 - Loss: 22.7862 - Accuracy: 0.9329


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.8633 - Val Accuracy: 0.9238


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 24/63 - Loss: 23.5962 - Accuracy: 0.9314


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.3285 - Val Accuracy: 0.9111


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 25/63 - Loss: 22.7414 - Accuracy: 0.9270


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.9313 - Val Accuracy: 0.9097


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 26/63 - Loss: 22.3356 - Accuracy: 0.9297


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.1022 - Val Accuracy: 0.9270


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 27/63 - Loss: 23.0350 - Accuracy: 0.9383


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.5898 - Val Accuracy: 0.9360


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 28/63 - Loss: 22.7975 - Accuracy: 0.9359


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.1386 - Val Accuracy: 0.9338


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 29/63 - Loss: 22.0325 - Accuracy: 0.9401


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.6756 - Val Accuracy: 0.9296


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 30/63 - Loss: 21.9557 - Accuracy: 0.9332


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.4190 - Val Accuracy: 0.9366


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 31/63 - Loss: 22.4101 - Accuracy: 0.9377


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.6723 - Val Accuracy: 0.8961


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 32/63 - Loss: 22.1082 - Accuracy: 0.9266


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.0586 - Val Accuracy: 0.8991


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 33/63 - Loss: 21.8378 - Accuracy: 0.9295


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.9242 - Val Accuracy: 0.9377


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 34/63 - Loss: 22.5489 - Accuracy: 0.9347


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.4597 - Val Accuracy: 0.9426


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 35/63 - Loss: 22.0464 - Accuracy: 0.9322


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.0393 - Val Accuracy: 0.9352


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 36/63 - Loss: 21.5677 - Accuracy: 0.9382


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.9938 - Val Accuracy: 0.9384


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 37/63 - Loss: 21.8736 - Accuracy: 0.9348


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.0899 - Val Accuracy: 0.9315


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 38/63 - Loss: 22.4682 - Accuracy: 0.9304


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.3313 - Val Accuracy: 0.9203


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 39/63 - Loss: 22.8231 - Accuracy: 0.9292


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 25.6421 - Val Accuracy: 0.9064


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 40/63 - Loss: 23.2311 - Accuracy: 0.9400


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.9875 - Val Accuracy: 0.9304


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 41/63 - Loss: 23.0319 - Accuracy: 0.9321


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.0012 - Val Accuracy: 0.9162


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 42/63 - Loss: 23.6879 - Accuracy: 0.9288


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 25.8653 - Val Accuracy: 0.8980


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 43/63 - Loss: 23.5096 - Accuracy: 0.9386


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.7627 - Val Accuracy: 0.9374


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 44/63 - Loss: 24.1520 - Accuracy: 0.9343


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.9142 - Val Accuracy: 0.9268


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 45/63 - Loss: 25.2156 - Accuracy: 0.9361


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.1010 - Val Accuracy: 0.9261


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 46/63 - Loss: 23.3175 - Accuracy: 0.9210


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.3167 - Val Accuracy: 0.9246


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 47/63 - Loss: 22.2596 - Accuracy: 0.9418


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.2503 - Val Accuracy: 0.9291


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 48/63 - Loss: 22.8035 - Accuracy: 0.9382


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.7776 - Val Accuracy: 0.9309


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 49/63 - Loss: 22.2610 - Accuracy: 0.9386


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 22.9242 - Val Accuracy: 0.9345


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 50/63 - Loss: 23.9561 - Accuracy: 0.9288


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.2155 - Val Accuracy: 0.9244


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 51/63 - Loss: 23.2068 - Accuracy: 0.9227


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 26.5921 - Val Accuracy: 0.8795


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 52/63 - Loss: 24.3340 - Accuracy: 0.9341


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 26.0051 - Val Accuracy: 0.9394


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 53/63 - Loss: 23.2753 - Accuracy: 0.9364


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.4069 - Val Accuracy: 0.9308


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 54/63 - Loss: 21.8652 - Accuracy: 0.9216


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.9075 - Val Accuracy: 0.9335


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 55/63 - Loss: 22.3381 - Accuracy: 0.9336


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.9658 - Val Accuracy: 0.9323


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 56/63 - Loss: 22.6103 - Accuracy: 0.9231


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 25.1943 - Val Accuracy: 0.9320


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 57/63 - Loss: 22.4829 - Accuracy: 0.9336


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 26.1205 - Val Accuracy: 0.9385


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 58/63 - Loss: 23.7601 - Accuracy: 0.9348


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 24.2263 - Val Accuracy: 0.9330


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 59/63 - Loss: 24.0469 - Accuracy: 0.9292


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 26.5367 - Val Accuracy: 0.9307


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 60/63 - Loss: 24.3135 - Accuracy: 0.9404


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 21.8692 - Val Accuracy: 0.9397


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 61/63 - Loss: 23.4811 - Accuracy: 0.9267


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 26.8936 - Val Accuracy: 0.9286


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 62/63 - Loss: 23.2730 - Accuracy: 0.9393


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 25.1719 - Val Accuracy: 0.9357


Training:   0%|          | 0/56 [00:00<?, ?it/s]

Epoch: 63/63 - Loss: 23.3708 - Accuracy: 0.9328


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-15 03:20:28,470][0m Trial 0 finished with value: 0.9354181885719299 and parameters: {'loss_learning_rate': 0.004111957846493814, 'learning_rate': 0.007183827565349839, 'weight_decay': 0.0002562032203757086, 'epsilon': 3.2600541782161438e-09, 'beta': 0.9, 'gamma': 0.7000000000000001, 'batch_size': 253, 'epochs': 63}. Best is trial 0 with value: 0.9354181885719299.[0m


Val Loss: 24.3910 - Val Accuracy: 0.9354
Saving best model...
Learning rate for Loss: 0.0002641425593565172
Learning rate: 0.00034441459865674015
Weight decay: 0.001721534113854349
Epsilon: 2.932545221358833e-08
Beta: 0.30000000000000004
Gamma: 0.7000000000000001
Batch size: 121
Number of epochs: 22


Epochs:   0%|          | 0/22 [00:00<?, ?it/s]

Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 1/22 - Loss: 9.2256 - Accuracy: 0.8220


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 4.9475 - Val Accuracy: 0.9057


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 2/22 - Loss: 4.4804 - Accuracy: 0.9133


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 3.8643 - Val Accuracy: 0.9283


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 3/22 - Loss: 3.5942 - Accuracy: 0.9302


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 3.4807 - Val Accuracy: 0.9269


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 4/22 - Loss: 3.3287 - Accuracy: 0.9297


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.9943 - Val Accuracy: 0.9379


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 5/22 - Loss: 2.9625 - Accuracy: 0.9372


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.7968 - Val Accuracy: 0.9445


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 6/22 - Loss: 2.7465 - Accuracy: 0.9356


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 3.1055 - Val Accuracy: 0.9187


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 7/22 - Loss: 2.5675 - Accuracy: 0.9395


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.4305 - Val Accuracy: 0.9289


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 8/22 - Loss: 2.4042 - Accuracy: 0.9391


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.3928 - Val Accuracy: 0.9379


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 9/22 - Loss: 2.2686 - Accuracy: 0.9414


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2850 - Val Accuracy: 0.9368


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 10/22 - Loss: 2.2079 - Accuracy: 0.9429


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.1381 - Val Accuracy: 0.9430


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 11/22 - Loss: 2.0640 - Accuracy: 0.9443


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.1065 - Val Accuracy: 0.9441


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 12/22 - Loss: 2.0362 - Accuracy: 0.9441


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.1102 - Val Accuracy: 0.9417


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 13/22 - Loss: 1.9877 - Accuracy: 0.9451


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.1300 - Val Accuracy: 0.9373


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 14/22 - Loss: 2.0088 - Accuracy: 0.9458


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2097 - Val Accuracy: 0.9327


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 15/22 - Loss: 2.0579 - Accuracy: 0.9428


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.1984 - Val Accuracy: 0.9355


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 16/22 - Loss: 2.0929 - Accuracy: 0.9435


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2176 - Val Accuracy: 0.9386


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 17/22 - Loss: 2.0896 - Accuracy: 0.9456


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2497 - Val Accuracy: 0.9360


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 18/22 - Loss: 2.0774 - Accuracy: 0.9500


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.1651 - Val Accuracy: 0.9384


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 19/22 - Loss: 2.1455 - Accuracy: 0.9481


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2993 - Val Accuracy: 0.9371


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 20/22 - Loss: 2.1261 - Accuracy: 0.9465


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2138 - Val Accuracy: 0.9401


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 21/22 - Loss: 2.1559 - Accuracy: 0.9456


  0%|          | 0/15 [00:00<?, ?it/s]

Val Loss: 2.2188 - Val Accuracy: 0.9351


Training:   0%|          | 0/116 [00:00<?, ?it/s]

Epoch: 22/22 - Loss: 2.0519 - Accuracy: 0.9480


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-12-15 03:32:20,972][0m Trial 1 finished with value: 0.9398800134658813 and parameters: {'loss_learning_rate': 0.0002641425593565172, 'learning_rate': 0.00034441459865674015, 'weight_decay': 0.001721534113854349, 'epsilon': 2.932545221358833e-08, 'beta': 0.30000000000000004, 'gamma': 0.7000000000000001, 'batch_size': 121, 'epochs': 22}. Best is trial 1 with value: 0.9398800134658813.[0m


Val Loss: 2.0513 - Val Accuracy: 0.9399
Saving best model...
Learning rate for Loss: 0.0005036423740623761
Learning rate: 0.0006907830780341196
Weight decay: 0.0007664771900801435
Epsilon: 3.799178790770374e-09
Beta: 0.9
Gamma: 0.30000000000000004
Batch size: 234
Number of epochs: 94


Epochs:   0%|          | 0/94 [00:00<?, ?it/s]

Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 1/94 - Loss: 7.7854 - Accuracy: 0.8203


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 4.7054 - Val Accuracy: 0.9071


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 2/94 - Loss: 3.7510 - Accuracy: 0.9169


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 3.0796 - Val Accuracy: 0.9168


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 3/94 - Loss: 2.9631 - Accuracy: 0.9291


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.7288 - Val Accuracy: 0.9255


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 4/94 - Loss: 2.6032 - Accuracy: 0.9296


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.4255 - Val Accuracy: 0.9365


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 5/94 - Loss: 2.3407 - Accuracy: 0.9350


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.3099 - Val Accuracy: 0.9365


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 6/94 - Loss: 2.2314 - Accuracy: 0.9330


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.0871 - Val Accuracy: 0.9402


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 7/94 - Loss: 2.0808 - Accuracy: 0.9389


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 2.0221 - Val Accuracy: 0.9357


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 8/94 - Loss: 1.9413 - Accuracy: 0.9396


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.9374 - Val Accuracy: 0.9309


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 9/94 - Loss: 1.8798 - Accuracy: 0.9388


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.8616 - Val Accuracy: 0.9326


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 10/94 - Loss: 1.7963 - Accuracy: 0.9406


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.8292 - Val Accuracy: 0.9349


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 11/94 - Loss: 1.7554 - Accuracy: 0.9372


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.7118 - Val Accuracy: 0.9430


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 12/94 - Loss: 1.6049 - Accuracy: 0.9421


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6823 - Val Accuracy: 0.9397


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 13/94 - Loss: 1.6096 - Accuracy: 0.9364


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.5689 - Val Accuracy: 0.9326


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 14/94 - Loss: 1.5962 - Accuracy: 0.9399


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6188 - Val Accuracy: 0.9376


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 15/94 - Loss: 1.5690 - Accuracy: 0.9397


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6331 - Val Accuracy: 0.9279


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 16/94 - Loss: 1.5297 - Accuracy: 0.9371


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6470 - Val Accuracy: 0.9406


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 17/94 - Loss: 1.5132 - Accuracy: 0.9400


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.5671 - Val Accuracy: 0.9337


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 18/94 - Loss: 1.5266 - Accuracy: 0.9395


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.5122 - Val Accuracy: 0.9372


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 19/94 - Loss: 1.5539 - Accuracy: 0.9431


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.7198 - Val Accuracy: 0.9351


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 20/94 - Loss: 1.5959 - Accuracy: 0.9435


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6287 - Val Accuracy: 0.9382


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 21/94 - Loss: 1.6171 - Accuracy: 0.9414


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.7766 - Val Accuracy: 0.9284


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 22/94 - Loss: 1.7128 - Accuracy: 0.9394


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.7565 - Val Accuracy: 0.9357


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 23/94 - Loss: 1.7058 - Accuracy: 0.9393


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.7964 - Val Accuracy: 0.9373


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 24/94 - Loss: 1.6669 - Accuracy: 0.9408


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6569 - Val Accuracy: 0.9380


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 25/94 - Loss: 1.5988 - Accuracy: 0.9425


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.8228 - Val Accuracy: 0.9213


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 26/94 - Loss: 1.5362 - Accuracy: 0.9425


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.8138 - Val Accuracy: 0.9272


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 27/94 - Loss: 1.5365 - Accuracy: 0.9373


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6451 - Val Accuracy: 0.9358


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 28/94 - Loss: 1.3955 - Accuracy: 0.9438


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6704 - Val Accuracy: 0.9162


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 29/94 - Loss: 1.4650 - Accuracy: 0.9368


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.5316 - Val Accuracy: 0.9351


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 30/94 - Loss: 1.3331 - Accuracy: 0.9433


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.4652 - Val Accuracy: 0.9265


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 31/94 - Loss: 1.3367 - Accuracy: 0.9431


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.6472 - Val Accuracy: 0.9111


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 32/94 - Loss: 1.2983 - Accuracy: 0.9438


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3881 - Val Accuracy: 0.9282


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 33/94 - Loss: 1.4114 - Accuracy: 0.9323


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3654 - Val Accuracy: 0.9341


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 34/94 - Loss: 1.2787 - Accuracy: 0.9418


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.4856 - Val Accuracy: 0.9269


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 35/94 - Loss: 1.2598 - Accuracy: 0.9454


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3196 - Val Accuracy: 0.9369


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 36/94 - Loss: 1.2932 - Accuracy: 0.9414


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3296 - Val Accuracy: 0.9386


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 37/94 - Loss: 1.2256 - Accuracy: 0.9456


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3664 - Val Accuracy: 0.9341


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 38/94 - Loss: 1.1898 - Accuracy: 0.9477


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2952 - Val Accuracy: 0.9336


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 39/94 - Loss: 1.2410 - Accuracy: 0.9430


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2276 - Val Accuracy: 0.9349


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 40/94 - Loss: 1.2142 - Accuracy: 0.9459


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2204 - Val Accuracy: 0.9381


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 41/94 - Loss: 1.1679 - Accuracy: 0.9475


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.5057 - Val Accuracy: 0.9223


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 42/94 - Loss: 1.3186 - Accuracy: 0.9342


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2398 - Val Accuracy: 0.9396


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 43/94 - Loss: 1.1397 - Accuracy: 0.9454


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2286 - Val Accuracy: 0.9352


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 44/94 - Loss: 1.1973 - Accuracy: 0.9456


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2086 - Val Accuracy: 0.9347


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 45/94 - Loss: 1.2809 - Accuracy: 0.9379


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2744 - Val Accuracy: 0.9358


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 46/94 - Loss: 1.1902 - Accuracy: 0.9447


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2498 - Val Accuracy: 0.9283


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 47/94 - Loss: 1.1570 - Accuracy: 0.9442


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3824 - Val Accuracy: 0.9272


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 48/94 - Loss: 1.1713 - Accuracy: 0.9445


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3615 - Val Accuracy: 0.9274


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 49/94 - Loss: 1.1988 - Accuracy: 0.9410


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.7082 - Val Accuracy: 0.9048


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 50/94 - Loss: 1.2384 - Accuracy: 0.9387


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2509 - Val Accuracy: 0.9352


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 51/94 - Loss: 1.1231 - Accuracy: 0.9455


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3406 - Val Accuracy: 0.9281


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 52/94 - Loss: 1.2515 - Accuracy: 0.9389


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2404 - Val Accuracy: 0.9357


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 53/94 - Loss: 1.1598 - Accuracy: 0.9444


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2469 - Val Accuracy: 0.9321


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 54/94 - Loss: 1.3334 - Accuracy: 0.9354


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.4355 - Val Accuracy: 0.9294


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 55/94 - Loss: 1.1054 - Accuracy: 0.9485


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2205 - Val Accuracy: 0.9356


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 56/94 - Loss: 1.1347 - Accuracy: 0.9478


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2186 - Val Accuracy: 0.9380


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 57/94 - Loss: 1.0967 - Accuracy: 0.9492


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2527 - Val Accuracy: 0.9337


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 58/94 - Loss: 1.1174 - Accuracy: 0.9497


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1967 - Val Accuracy: 0.9384


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 59/94 - Loss: 1.1618 - Accuracy: 0.9457


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3466 - Val Accuracy: 0.9325


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 60/94 - Loss: 1.1262 - Accuracy: 0.9465


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1019 - Val Accuracy: 0.9433


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 61/94 - Loss: 1.1245 - Accuracy: 0.9473


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2401 - Val Accuracy: 0.9352


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 62/94 - Loss: 1.1349 - Accuracy: 0.9453


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3835 - Val Accuracy: 0.9302


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 63/94 - Loss: 1.1161 - Accuracy: 0.9470


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2954 - Val Accuracy: 0.9347


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 64/94 - Loss: 1.1189 - Accuracy: 0.9488


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2166 - Val Accuracy: 0.9444


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 65/94 - Loss: 1.1370 - Accuracy: 0.9467


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2570 - Val Accuracy: 0.9373


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 66/94 - Loss: 1.1380 - Accuracy: 0.9453


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1501 - Val Accuracy: 0.9418


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 67/94 - Loss: 1.1542 - Accuracy: 0.9450


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.3190 - Val Accuracy: 0.9364


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 68/94 - Loss: 1.1217 - Accuracy: 0.9467


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1926 - Val Accuracy: 0.9436


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 69/94 - Loss: 1.1020 - Accuracy: 0.9458


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2436 - Val Accuracy: 0.9405


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 70/94 - Loss: 1.0797 - Accuracy: 0.9470


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2504 - Val Accuracy: 0.9351


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 71/94 - Loss: 1.0732 - Accuracy: 0.9476


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.0992 - Val Accuracy: 0.9390


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 72/94 - Loss: 1.1216 - Accuracy: 0.9452


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1909 - Val Accuracy: 0.9411


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 73/94 - Loss: 1.1105 - Accuracy: 0.9464


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1031 - Val Accuracy: 0.9401


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 74/94 - Loss: 1.0995 - Accuracy: 0.9462


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.0712 - Val Accuracy: 0.9424


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 75/94 - Loss: 1.0977 - Accuracy: 0.9463


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2775 - Val Accuracy: 0.9255


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 76/94 - Loss: 1.0727 - Accuracy: 0.9497


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1457 - Val Accuracy: 0.9411


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 77/94 - Loss: 1.0840 - Accuracy: 0.9437


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2551 - Val Accuracy: 0.9337


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 78/94 - Loss: 1.0493 - Accuracy: 0.9467


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1441 - Val Accuracy: 0.9418


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 79/94 - Loss: 1.0464 - Accuracy: 0.9494


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.4155 - Val Accuracy: 0.9278


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 80/94 - Loss: 1.1302 - Accuracy: 0.9461


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1584 - Val Accuracy: 0.9401


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 81/94 - Loss: 1.0634 - Accuracy: 0.9495


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1281 - Val Accuracy: 0.9407


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 82/94 - Loss: 1.0432 - Accuracy: 0.9490


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1314 - Val Accuracy: 0.9319


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 83/94 - Loss: 1.0411 - Accuracy: 0.9509


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1578 - Val Accuracy: 0.9363


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 84/94 - Loss: 1.0603 - Accuracy: 0.9469


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.0855 - Val Accuracy: 0.9351


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 85/94 - Loss: 1.0537 - Accuracy: 0.9484


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1454 - Val Accuracy: 0.9372


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 86/94 - Loss: 1.0025 - Accuracy: 0.9505


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.0745 - Val Accuracy: 0.9428


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 87/94 - Loss: 1.1005 - Accuracy: 0.9450


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2386 - Val Accuracy: 0.9335


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 88/94 - Loss: 1.0956 - Accuracy: 0.9452


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2085 - Val Accuracy: 0.9368


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 89/94 - Loss: 1.0516 - Accuracy: 0.9494


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.0571 - Val Accuracy: 0.9402


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 90/94 - Loss: 1.0637 - Accuracy: 0.9470


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.2161 - Val Accuracy: 0.9337


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 91/94 - Loss: 1.0645 - Accuracy: 0.9473


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1588 - Val Accuracy: 0.9341


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 92/94 - Loss: 1.0655 - Accuracy: 0.9482


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1814 - Val Accuracy: 0.9346


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 93/94 - Loss: 1.0293 - Accuracy: 0.9505


  0%|          | 0/8 [00:00<?, ?it/s]

Val Loss: 1.1478 - Val Accuracy: 0.9368


Training:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch: 94/94 - Loss: 1.0566 - Accuracy: 0.9500


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:20:58,261][0m Trial 2 finished with value: 0.937235414981842 and parameters: {'loss_learning_rate': 0.0005036423740623761, 'learning_rate': 0.0006907830780341196, 'weight_decay': 0.0007664771900801435, 'epsilon': 3.799178790770374e-09, 'beta': 0.9, 'gamma': 0.30000000000000004, 'batch_size': 234, 'epochs': 94}. Best is trial 1 with value: 0.9398800134658813.[0m


Val Loss: 1.1690 - Val Accuracy: 0.9372
Learning rate for Loss: 0.00020145171409572714
Learning rate: 0.0012024279958608084
Weight decay: 0.0023429739554198924
Epsilon: 1.0822252578243318e-09
Beta: 0.5
Gamma: 0.6
Batch size: 53
Number of epochs: 93


Epochs:   0%|          | 0/93 [00:00<?, ?it/s]

Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 1/93 - Loss: 6.0758 - Accuracy: 0.8832


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 5.0123 - Val Accuracy: 0.8906


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 2/93 - Loss: 3.9710 - Accuracy: 0.9177


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 3.7191 - Val Accuracy: 0.9145


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 3/93 - Loss: 3.4480 - Accuracy: 0.9275


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 3.4831 - Val Accuracy: 0.9225


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 4/93 - Loss: 3.0667 - Accuracy: 0.9310


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 3.3532 - Val Accuracy: 0.9082


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 5/93 - Loss: 2.7801 - Accuracy: 0.9337


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.7018 - Val Accuracy: 0.9295


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 6/93 - Loss: 2.6452 - Accuracy: 0.9323


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5015 - Val Accuracy: 0.9393


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 7/93 - Loss: 2.6436 - Accuracy: 0.9304


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2871 - Val Accuracy: 0.9364


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 8/93 - Loss: 2.5093 - Accuracy: 0.9357


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5201 - Val Accuracy: 0.9410


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 9/93 - Loss: 2.6114 - Accuracy: 0.9357


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.8788 - Val Accuracy: 0.9278


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 10/93 - Loss: 2.6547 - Accuracy: 0.9395


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5635 - Val Accuracy: 0.9370


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 11/93 - Loss: 2.8032 - Accuracy: 0.9381


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 3.0325 - Val Accuracy: 0.9284


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 12/93 - Loss: 2.6813 - Accuracy: 0.9411


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4939 - Val Accuracy: 0.9387


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 13/93 - Loss: 2.6827 - Accuracy: 0.9394


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4406 - Val Accuracy: 0.9353


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 14/93 - Loss: 2.7188 - Accuracy: 0.9386


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4072 - Val Accuracy: 0.9410


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 15/93 - Loss: 2.5944 - Accuracy: 0.9433


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.7517 - Val Accuracy: 0.9426


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 16/93 - Loss: 2.5658 - Accuracy: 0.9459


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.6034 - Val Accuracy: 0.9381


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 17/93 - Loss: 2.6214 - Accuracy: 0.9428


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5034 - Val Accuracy: 0.9301


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 18/93 - Loss: 2.5273 - Accuracy: 0.9425


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4370 - Val Accuracy: 0.9369


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 19/93 - Loss: 2.4236 - Accuracy: 0.9415


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3214 - Val Accuracy: 0.9386


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 20/93 - Loss: 2.3616 - Accuracy: 0.9438


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3262 - Val Accuracy: 0.9410


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 21/93 - Loss: 2.3343 - Accuracy: 0.9411


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3991 - Val Accuracy: 0.9370


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 22/93 - Loss: 2.2906 - Accuracy: 0.9447


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4818 - Val Accuracy: 0.9307


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 23/93 - Loss: 2.3223 - Accuracy: 0.9424


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2847 - Val Accuracy: 0.9375


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 24/93 - Loss: 2.2674 - Accuracy: 0.9422


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3749 - Val Accuracy: 0.9404


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 25/93 - Loss: 2.2605 - Accuracy: 0.9416


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.8977 - Val Accuracy: 0.9186


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 26/93 - Loss: 2.3022 - Accuracy: 0.9409


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5027 - Val Accuracy: 0.9347


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 27/93 - Loss: 2.3118 - Accuracy: 0.9421


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4894 - Val Accuracy: 0.9283


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 28/93 - Loss: 2.1317 - Accuracy: 0.9441


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1815 - Val Accuracy: 0.9422


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 29/93 - Loss: 2.1644 - Accuracy: 0.9423


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4743 - Val Accuracy: 0.9433


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 30/93 - Loss: 2.2216 - Accuracy: 0.9432


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3806 - Val Accuracy: 0.9342


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 31/93 - Loss: 2.3058 - Accuracy: 0.9440


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 1.9758 - Val Accuracy: 0.9415


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 32/93 - Loss: 2.2440 - Accuracy: 0.9424


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3930 - Val Accuracy: 0.9404


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 33/93 - Loss: 2.2196 - Accuracy: 0.9407


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3383 - Val Accuracy: 0.9341


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 34/93 - Loss: 2.2150 - Accuracy: 0.9438


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4612 - Val Accuracy: 0.9330


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 35/93 - Loss: 2.0961 - Accuracy: 0.9458


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2720 - Val Accuracy: 0.9283


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 36/93 - Loss: 2.2287 - Accuracy: 0.9430


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5557 - Val Accuracy: 0.9370


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 37/93 - Loss: 2.2167 - Accuracy: 0.9458


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2999 - Val Accuracy: 0.9294


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 38/93 - Loss: 2.1359 - Accuracy: 0.9442


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.7965 - Val Accuracy: 0.9353


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 39/93 - Loss: 2.2677 - Accuracy: 0.9440


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.6951 - Val Accuracy: 0.9353


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 40/93 - Loss: 2.2399 - Accuracy: 0.9445


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2969 - Val Accuracy: 0.9335


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 41/93 - Loss: 2.2587 - Accuracy: 0.9435


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4635 - Val Accuracy: 0.9335


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 42/93 - Loss: 2.1857 - Accuracy: 0.9454


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2554 - Val Accuracy: 0.9369


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 43/93 - Loss: 2.2971 - Accuracy: 0.9405


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3140 - Val Accuracy: 0.9375


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 44/93 - Loss: 2.1554 - Accuracy: 0.9446


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3038 - Val Accuracy: 0.9381


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 45/93 - Loss: 2.1212 - Accuracy: 0.9458


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5703 - Val Accuracy: 0.9330


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 46/93 - Loss: 2.1190 - Accuracy: 0.9465


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1769 - Val Accuracy: 0.9254


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 47/93 - Loss: 2.0973 - Accuracy: 0.9445


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1469 - Val Accuracy: 0.9346


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 48/93 - Loss: 2.1616 - Accuracy: 0.9430


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2242 - Val Accuracy: 0.9347


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 49/93 - Loss: 2.1117 - Accuracy: 0.9445


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5832 - Val Accuracy: 0.9306


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 50/93 - Loss: 2.1408 - Accuracy: 0.9435


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2701 - Val Accuracy: 0.9381


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 51/93 - Loss: 2.1658 - Accuracy: 0.9451


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2209 - Val Accuracy: 0.9335


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 52/93 - Loss: 2.2144 - Accuracy: 0.9415


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3020 - Val Accuracy: 0.9444


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 53/93 - Loss: 2.1850 - Accuracy: 0.9445


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2432 - Val Accuracy: 0.9399


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 54/93 - Loss: 2.1584 - Accuracy: 0.9423


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1642 - Val Accuracy: 0.9381


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 55/93 - Loss: 2.1751 - Accuracy: 0.9409


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1129 - Val Accuracy: 0.9398


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 56/93 - Loss: 2.1594 - Accuracy: 0.9467


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3511 - Val Accuracy: 0.9301


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 57/93 - Loss: 2.1065 - Accuracy: 0.9456


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3797 - Val Accuracy: 0.9255


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 58/93 - Loss: 2.1055 - Accuracy: 0.9434


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.6055 - Val Accuracy: 0.9324


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 59/93 - Loss: 2.1639 - Accuracy: 0.9431


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2922 - Val Accuracy: 0.9381


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 60/93 - Loss: 2.1745 - Accuracy: 0.9450


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0481 - Val Accuracy: 0.9392


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 61/93 - Loss: 2.1595 - Accuracy: 0.9462


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0995 - Val Accuracy: 0.9392


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 62/93 - Loss: 2.1737 - Accuracy: 0.9468


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1970 - Val Accuracy: 0.9295


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 63/93 - Loss: 2.1807 - Accuracy: 0.9435


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2868 - Val Accuracy: 0.9300


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 64/93 - Loss: 2.1026 - Accuracy: 0.9453


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 3.1708 - Val Accuracy: 0.9416


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 65/93 - Loss: 2.1319 - Accuracy: 0.9462


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3553 - Val Accuracy: 0.9335


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 66/93 - Loss: 2.0939 - Accuracy: 0.9462


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0092 - Val Accuracy: 0.9421


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 67/93 - Loss: 2.1653 - Accuracy: 0.9438


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4783 - Val Accuracy: 0.9398


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 68/93 - Loss: 2.1660 - Accuracy: 0.9435


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1181 - Val Accuracy: 0.9364


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 69/93 - Loss: 2.2010 - Accuracy: 0.9435


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4309 - Val Accuracy: 0.9249


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 70/93 - Loss: 2.1754 - Accuracy: 0.9434


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0408 - Val Accuracy: 0.9421


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 71/93 - Loss: 2.1675 - Accuracy: 0.9441


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0406 - Val Accuracy: 0.9341


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 72/93 - Loss: 2.2264 - Accuracy: 0.9414


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.5265 - Val Accuracy: 0.9295


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 73/93 - Loss: 2.1080 - Accuracy: 0.9466


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4079 - Val Accuracy: 0.9427


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 74/93 - Loss: 2.2136 - Accuracy: 0.9428


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4185 - Val Accuracy: 0.9312


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 75/93 - Loss: 2.1000 - Accuracy: 0.9484


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4020 - Val Accuracy: 0.9306


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 76/93 - Loss: 2.1757 - Accuracy: 0.9449


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.6904 - Val Accuracy: 0.9301


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 77/93 - Loss: 2.1617 - Accuracy: 0.9439


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1701 - Val Accuracy: 0.9392


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 78/93 - Loss: 2.0886 - Accuracy: 0.9450


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2145 - Val Accuracy: 0.9301


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 79/93 - Loss: 2.1678 - Accuracy: 0.9439


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.1049 - Val Accuracy: 0.9398


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 80/93 - Loss: 2.1719 - Accuracy: 0.9447


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2243 - Val Accuracy: 0.9318


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 81/93 - Loss: 2.1079 - Accuracy: 0.9477


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3374 - Val Accuracy: 0.9290


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 82/93 - Loss: 2.1693 - Accuracy: 0.9444


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2535 - Val Accuracy: 0.9334


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 83/93 - Loss: 2.1580 - Accuracy: 0.9439


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0721 - Val Accuracy: 0.9375


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 84/93 - Loss: 2.1445 - Accuracy: 0.9430


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2706 - Val Accuracy: 0.9341


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 85/93 - Loss: 2.1848 - Accuracy: 0.9453


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.0287 - Val Accuracy: 0.9353


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 86/93 - Loss: 2.1465 - Accuracy: 0.9431


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3145 - Val Accuracy: 0.9381


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 87/93 - Loss: 2.1764 - Accuracy: 0.9442


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4653 - Val Accuracy: 0.9266


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 88/93 - Loss: 2.1401 - Accuracy: 0.9435


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2366 - Val Accuracy: 0.9387


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 89/93 - Loss: 2.1664 - Accuracy: 0.9450


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3024 - Val Accuracy: 0.9352


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 90/93 - Loss: 2.1617 - Accuracy: 0.9452


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.2282 - Val Accuracy: 0.9363


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 91/93 - Loss: 2.2305 - Accuracy: 0.9431


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.4523 - Val Accuracy: 0.9300


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 92/93 - Loss: 2.1790 - Accuracy: 0.9452


  0%|          | 0/33 [00:00<?, ?it/s]

Val Loss: 2.3429 - Val Accuracy: 0.9335


Training:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch: 93/93 - Loss: 2.1502 - Accuracy: 0.9441


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:08:47,167][0m Trial 3 finished with value: 0.9357767105102539 and parameters: {'loss_learning_rate': 0.00020145171409572714, 'learning_rate': 0.0012024279958608084, 'weight_decay': 0.0023429739554198924, 'epsilon': 1.0822252578243318e-09, 'beta': 0.5, 'gamma': 0.6, 'batch_size': 53, 'epochs': 93}. Best is trial 1 with value: 0.9398800134658813.[0m


Val Loss: 2.1076 - Val Accuracy: 0.9358
Learning rate for Loss: 0.0005091177917035578
Learning rate: 1.1508165543242902e-05
Weight decay: 0.0025650012866077383
Epsilon: 4.769776651541793e-08
Beta: 0.8
Gamma: 0.30000000000000004
Batch size: 154
Number of epochs: 76


Epochs:   0%|          | 0/76 [00:00<?, ?it/s]

Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 1/76 - Loss: 15.7343 - Accuracy: 0.5320


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 14.3112 - Val Accuracy: 0.6080


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 2/76 - Loss: 13.4763 - Accuracy: 0.6145


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 12.8549 - Val Accuracy: 0.6616


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 3/76 - Loss: 12.2196 - Accuracy: 0.6661


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 11.9160 - Val Accuracy: 0.7092


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 4/76 - Loss: 11.2297 - Accuracy: 0.7184


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 10.5337 - Val Accuracy: 0.7618


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 5/76 - Loss: 9.9831 - Accuracy: 0.7617


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 9.4749 - Val Accuracy: 0.7926


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 6/76 - Loss: 8.9629 - Accuracy: 0.7873


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.4692 - Val Accuracy: 0.8170


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 7/76 - Loss: 8.0520 - Accuracy: 0.8105


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.6806 - Val Accuracy: 0.8338


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 8/76 - Loss: 7.4227 - Accuracy: 0.8257


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.9230 - Val Accuracy: 0.8517


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 9/76 - Loss: 6.9268 - Accuracy: 0.8364


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.5203 - Val Accuracy: 0.8614


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 10/76 - Loss: 6.4112 - Accuracy: 0.8446


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.4235 - Val Accuracy: 0.8739


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 11/76 - Loss: 5.9979 - Accuracy: 0.8520


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.6110 - Val Accuracy: 0.8739


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 12/76 - Loss: 5.6857 - Accuracy: 0.8568


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.3969 - Val Accuracy: 0.8874


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 13/76 - Loss: 5.3201 - Accuracy: 0.8654


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0972 - Val Accuracy: 0.8836


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 14/76 - Loss: 5.0629 - Accuracy: 0.8700


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9987 - Val Accuracy: 0.8934


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 15/76 - Loss: 4.8978 - Accuracy: 0.8756


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7364 - Val Accuracy: 0.8955


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 16/76 - Loss: 4.6883 - Accuracy: 0.8797


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4709 - Val Accuracy: 0.8988


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 17/76 - Loss: 4.5576 - Accuracy: 0.8823


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6626 - Val Accuracy: 0.8993


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 18/76 - Loss: 4.5814 - Accuracy: 0.8845


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6794 - Val Accuracy: 0.8971


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 19/76 - Loss: 4.8562 - Accuracy: 0.8868


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9462 - Val Accuracy: 0.8982


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 20/76 - Loss: 4.9534 - Accuracy: 0.8905


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0196 - Val Accuracy: 0.8993


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 21/76 - Loss: 5.2255 - Accuracy: 0.8914


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1254 - Val Accuracy: 0.9020


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 22/76 - Loss: 5.2663 - Accuracy: 0.8947


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.2056 - Val Accuracy: 0.9026


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 23/76 - Loss: 5.2560 - Accuracy: 0.8958


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0977 - Val Accuracy: 0.9031


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 24/76 - Loss: 5.2434 - Accuracy: 0.8974


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1286 - Val Accuracy: 0.9042


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 25/76 - Loss: 5.2057 - Accuracy: 0.8998


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0403 - Val Accuracy: 0.9063


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 26/76 - Loss: 5.1730 - Accuracy: 0.9017


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0976 - Val Accuracy: 0.9085


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 27/76 - Loss: 5.0721 - Accuracy: 0.9027


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8919 - Val Accuracy: 0.9107


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 28/76 - Loss: 4.9546 - Accuracy: 0.9043


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9340 - Val Accuracy: 0.9101


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 29/76 - Loss: 4.8736 - Accuracy: 0.9066


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7085 - Val Accuracy: 0.9096


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 30/76 - Loss: 4.7624 - Accuracy: 0.9068


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5672 - Val Accuracy: 0.9112


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 31/76 - Loss: 4.6562 - Accuracy: 0.9082


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6731 - Val Accuracy: 0.9112


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 32/76 - Loss: 4.5322 - Accuracy: 0.9101


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5101 - Val Accuracy: 0.9107


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 33/76 - Loss: 4.4888 - Accuracy: 0.9106


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.3648 - Val Accuracy: 0.9139


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 34/76 - Loss: 4.4112 - Accuracy: 0.9119


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.3663 - Val Accuracy: 0.9128


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 35/76 - Loss: 4.3277 - Accuracy: 0.9124


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.2270 - Val Accuracy: 0.9123


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 36/76 - Loss: 4.2326 - Accuracy: 0.9139


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.2546 - Val Accuracy: 0.9107


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 37/76 - Loss: 4.1953 - Accuracy: 0.9152


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.1967 - Val Accuracy: 0.9150


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 38/76 - Loss: 4.1232 - Accuracy: 0.9150


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.0801 - Val Accuracy: 0.9134


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 39/76 - Loss: 4.1036 - Accuracy: 0.9172


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.1409 - Val Accuracy: 0.9155


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 40/76 - Loss: 4.0194 - Accuracy: 0.9184


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.0420 - Val Accuracy: 0.9172


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 41/76 - Loss: 3.9571 - Accuracy: 0.9179


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.9007 - Val Accuracy: 0.9139


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 42/76 - Loss: 3.9179 - Accuracy: 0.9178


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.0569 - Val Accuracy: 0.9144


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 43/76 - Loss: 3.9133 - Accuracy: 0.9208


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.9648 - Val Accuracy: 0.9248


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 44/76 - Loss: 3.9599 - Accuracy: 0.9206


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.9526 - Val Accuracy: 0.9199


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 45/76 - Loss: 3.8703 - Accuracy: 0.9206


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.9382 - Val Accuracy: 0.9172


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 46/76 - Loss: 3.8485 - Accuracy: 0.9213


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.9018 - Val Accuracy: 0.9139


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 47/76 - Loss: 3.8339 - Accuracy: 0.9230


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.8028 - Val Accuracy: 0.9182


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 48/76 - Loss: 3.8739 - Accuracy: 0.9224


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.8716 - Val Accuracy: 0.9215


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 49/76 - Loss: 3.7936 - Accuracy: 0.9228


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7462 - Val Accuracy: 0.9161


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 50/76 - Loss: 3.7785 - Accuracy: 0.9246


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.9148 - Val Accuracy: 0.9139


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 51/76 - Loss: 3.7475 - Accuracy: 0.9242


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7232 - Val Accuracy: 0.9242


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 52/76 - Loss: 3.7801 - Accuracy: 0.9250


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7028 - Val Accuracy: 0.9264


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 53/76 - Loss: 3.7472 - Accuracy: 0.9257


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7929 - Val Accuracy: 0.9258


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 54/76 - Loss: 3.7314 - Accuracy: 0.9259


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.6466 - Val Accuracy: 0.9231


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 55/76 - Loss: 3.7225 - Accuracy: 0.9267


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.6860 - Val Accuracy: 0.9285


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 56/76 - Loss: 3.7298 - Accuracy: 0.9270


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7259 - Val Accuracy: 0.9231


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 57/76 - Loss: 3.6849 - Accuracy: 0.9271


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7232 - Val Accuracy: 0.9269


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 58/76 - Loss: 3.6671 - Accuracy: 0.9280


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.6221 - Val Accuracy: 0.9253


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 59/76 - Loss: 3.6722 - Accuracy: 0.9279


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.7494 - Val Accuracy: 0.9242


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 60/76 - Loss: 3.6121 - Accuracy: 0.9293


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.6960 - Val Accuracy: 0.9258


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 61/76 - Loss: 3.5816 - Accuracy: 0.9291


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.6153 - Val Accuracy: 0.9269


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 62/76 - Loss: 3.5695 - Accuracy: 0.9313


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.5424 - Val Accuracy: 0.9258


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 63/76 - Loss: 3.5842 - Accuracy: 0.9299


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.5741 - Val Accuracy: 0.9269


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 64/76 - Loss: 3.5207 - Accuracy: 0.9306


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.4944 - Val Accuracy: 0.9296


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 65/76 - Loss: 3.4929 - Accuracy: 0.9304


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.5227 - Val Accuracy: 0.9264


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 66/76 - Loss: 3.5172 - Accuracy: 0.9314


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.5041 - Val Accuracy: 0.9269


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 67/76 - Loss: 3.4822 - Accuracy: 0.9322


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.5363 - Val Accuracy: 0.9258


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 68/76 - Loss: 3.4507 - Accuracy: 0.9330


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.4124 - Val Accuracy: 0.9247


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 69/76 - Loss: 3.4097 - Accuracy: 0.9326


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.2686 - Val Accuracy: 0.9274


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 70/76 - Loss: 3.3895 - Accuracy: 0.9332


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.4304 - Val Accuracy: 0.9274


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 71/76 - Loss: 3.3752 - Accuracy: 0.9342


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.3316 - Val Accuracy: 0.9296


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 72/76 - Loss: 3.3663 - Accuracy: 0.9337


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.2314 - Val Accuracy: 0.9269


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 73/76 - Loss: 3.3342 - Accuracy: 0.9341


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.3226 - Val Accuracy: 0.9280


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 74/76 - Loss: 3.2991 - Accuracy: 0.9339


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.3180 - Val Accuracy: 0.9296


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 75/76 - Loss: 3.3463 - Accuracy: 0.9350


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 3.2819 - Val Accuracy: 0.9312


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Epoch: 76/76 - Loss: 3.2992 - Accuracy: 0.9349


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:48:33,055][0m Trial 4 finished with value: 0.9301523566246033 and parameters: {'loss_learning_rate': 0.0005091177917035578, 'learning_rate': 1.1508165543242902e-05, 'weight_decay': 0.0025650012866077383, 'epsilon': 4.769776651541793e-08, 'beta': 0.8, 'gamma': 0.30000000000000004, 'batch_size': 154, 'epochs': 76}. Best is trial 1 with value: 0.9398800134658813.[0m


Val Loss: 3.3440 - Val Accuracy: 0.9302
Learning rate for Loss: 0.0029510522200859313
Learning rate: 0.040623467297495094
Weight decay: 0.006574385591759983
Epsilon: 7.562986209672438e-09
Beta: 0.9
Gamma: 0.2
Batch size: 88
Number of epochs: 52


Epochs:   0%|          | 0/52 [00:00<?, ?it/s]

Training:   0%|          | 0/159 [00:00<?, ?it/s]

Epoch: 1/52 - Loss: 800.8986 - Accuracy: 0.8259


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:49:03,642][0m Trial 5 pruned. [0m


Val Loss: 1122.3689 - Val Accuracy: 0.8805
Learning rate for Loss: 0.0034413494291320634
Learning rate: 0.011596616417116035
Weight decay: 0.0037796081832779404
Epsilon: 7.637927176184173e-09
Beta: 0.5
Gamma: 0.6
Batch size: 108
Number of epochs: 50


Epochs:   0%|          | 0/50 [00:00<?, ?it/s]

Training:   0%|          | 0/130 [00:00<?, ?it/s]

Epoch: 1/50 - Loss: 32.2582 - Accuracy: 0.7834


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:49:34,501][0m Trial 6 pruned. [0m


Val Loss: 29.7702 - Val Accuracy: 0.8567
Learning rate for Loss: 0.0034402347715347613
Learning rate: 0.050968052309708925
Weight decay: 0.0001648375313305286
Epsilon: 9.154631834218333e-09
Beta: 0.8
Gamma: 0.30000000000000004
Batch size: 87
Number of epochs: 58


Epochs:   0%|          | 0/58 [00:00<?, ?it/s]

Training:   0%|          | 0/161 [00:00<?, ?it/s]

Epoch: 1/58 - Loss: 999.7747 - Accuracy: 0.8288


  0%|          | 0/21 [00:00<?, ?it/s]

Val Loss: 1562.6871 - Val Accuracy: 0.9048


Training:   0%|          | 0/161 [00:00<?, ?it/s]

Epoch: 2/58 - Loss: 1683.6489 - Accuracy: 0.8874


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:50:35,959][0m Trial 7 pruned. [0m


Val Loss: 1686.9940 - Val Accuracy: 0.9075
Learning rate for Loss: 0.0011814487459279781
Learning rate: 1.1551703912512678e-05
Weight decay: 0.006506814220264782
Epsilon: 1.5136014294739561e-09
Beta: 0.1
Gamma: 1.0
Batch size: 131
Number of epochs: 44


Epochs:   0%|          | 0/44 [00:00<?, ?it/s]

Training:   0%|          | 0/107 [00:00<?, ?it/s]

Epoch: 1/44 - Loss: 28.9931 - Accuracy: 0.5887


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:51:07,129][0m Trial 8 pruned. [0m


Val Loss: 25.0583 - Val Accuracy: 0.6036
Learning rate for Loss: 0.0001604275234379164
Learning rate: 0.0011009593653653477
Weight decay: 0.007787046924352808
Epsilon: 9.197975330985514e-09
Beta: 0.6
Gamma: 0.6
Batch size: 197
Number of epochs: 12


Epochs:   0%|          | 0/12 [00:00<?, ?it/s]

Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 1/12 - Loss: 8.9734 - Accuracy: 0.8537


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.8411 - Val Accuracy: 0.9054


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 2/12 - Loss: 4.2505 - Accuracy: 0.9208


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.6570 - Val Accuracy: 0.9239


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 3/12 - Loss: 3.6365 - Accuracy: 0.9260


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.2456 - Val Accuracy: 0.9247


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 4/12 - Loss: 3.1667 - Accuracy: 0.9374


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.8208 - Val Accuracy: 0.9349


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 5/12 - Loss: 3.1294 - Accuracy: 0.9293


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.1919 - Val Accuracy: 0.9189


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 6/12 - Loss: 3.0266 - Accuracy: 0.9302


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.0080 - Val Accuracy: 0.9305


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 7/12 - Loss: 2.7364 - Accuracy: 0.9395


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.5138 - Val Accuracy: 0.9356


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 8/12 - Loss: 2.7180 - Accuracy: 0.9381


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.8279 - Val Accuracy: 0.9328


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 9/12 - Loss: 2.5954 - Accuracy: 0.9388


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.7326 - Val Accuracy: 0.9333


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 10/12 - Loss: 2.3873 - Accuracy: 0.9449


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.5378 - Val Accuracy: 0.9339


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 11/12 - Loss: 2.4048 - Accuracy: 0.9418


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 2.2772 - Val Accuracy: 0.9355


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 12/12 - Loss: 2.3566 - Accuracy: 0.9427


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:57:24,259][0m Trial 9 finished with value: 0.9407815933227539 and parameters: {'loss_learning_rate': 0.0001604275234379164, 'learning_rate': 0.0011009593653653477, 'weight_decay': 0.007787046924352808, 'epsilon': 9.197975330985514e-09, 'beta': 0.6, 'gamma': 0.6, 'batch_size': 197, 'epochs': 12}. Best is trial 9 with value: 0.9407815933227539.[0m


Val Loss: 2.2999 - Val Accuracy: 0.9408
Saving best model...
Learning rate for Loss: 0.009690324542031335
Learning rate: 0.00017510386245021606
Weight decay: 0.0006228056549203043
Epsilon: 9.7493105273897e-08
Beta: 0.6
Gamma: 1.0
Batch size: 206
Number of epochs: 10


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 1/10 - Loss: 18.7769 - Accuracy: 0.6758


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:57:55,055][0m Trial 10 pruned. [0m


Val Loss: 10.7827 - Val Accuracy: 0.8534
Learning rate for Loss: 0.00011079082049702739
Learning rate: 0.00012945418719134108
Weight decay: 0.009571449098551644
Epsilon: 2.8107292919381603e-08
Beta: 0.2
Gamma: 0.8
Batch size: 187
Number of epochs: 10


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/75 [00:00<?, ?it/s]

Epoch: 1/10 - Loss: 16.5906 - Accuracy: 0.6812


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:58:26,517][0m Trial 11 pruned. [0m


Val Loss: 11.2161 - Val Accuracy: 0.8186
Learning rate for Loss: 0.00022606777198880975
Learning rate: 0.0011076282946795878
Weight decay: 0.0014221077901498563
Epsilon: 2.754403049068034e-08
Beta: 0.30000000000000004
Gamma: 0.8
Batch size: 165
Number of epochs: 26


Epochs:   0%|          | 0/26 [00:00<?, ?it/s]

Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 1/26 - Loss: 8.1883 - Accuracy: 0.8501


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.5339 - Val Accuracy: 0.9096


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 2/26 - Loss: 4.1250 - Accuracy: 0.9251


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6894 - Val Accuracy: 0.9285


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 3/26 - Loss: 3.4859 - Accuracy: 0.9305


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.4112 - Val Accuracy: 0.8864


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 4/26 - Loss: 3.3682 - Accuracy: 0.9285


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.5842 - Val Accuracy: 0.9196


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 5/26 - Loss: 3.0766 - Accuracy: 0.9338


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:01:01,195][0m Trial 12 pruned. [0m


Val Loss: 3.1868 - Val Accuracy: 0.9224
Learning rate for Loss: 0.00011292303100734567
Learning rate: 0.00015413023560240728
Weight decay: 0.00043958181661288005
Epsilon: 2.0172071399563927e-08
Beta: 0.4
Gamma: 0.5
Batch size: 275
Number of epochs: 30


Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Training:   0%|          | 0/51 [00:00<?, ?it/s]

Epoch: 1/30 - Loss: 12.6550 - Accuracy: 0.6606


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:01:33,123][0m Trial 13 pruned. [0m


Val Loss: 8.8108 - Val Accuracy: 0.8058
Learning rate for Loss: 0.0003521949619561741
Learning rate: 0.003491220123509423
Weight decay: 0.0013925287932739575
Epsilon: 1.6233162463456852e-08
Beta: 0.7000000000000001
Gamma: 0.5
Batch size: 211
Number of epochs: 27


Epochs:   0%|          | 0/27 [00:00<?, ?it/s]

Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 1/27 - Loss: 9.2262 - Accuracy: 0.8438


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 5.7868 - Val Accuracy: 0.9262


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 2/27 - Loss: 5.7394 - Accuracy: 0.9176


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 6.7140 - Val Accuracy: 0.8566


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 3/27 - Loss: 4.9977 - Accuracy: 0.9212


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.5888 - Val Accuracy: 0.9378


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 4/27 - Loss: 4.6342 - Accuracy: 0.9276


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.0421 - Val Accuracy: 0.9290


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 5/27 - Loss: 4.2978 - Accuracy: 0.9322


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.1951 - Val Accuracy: 0.9185


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 6/27 - Loss: 4.3030 - Accuracy: 0.9230


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.2043 - Val Accuracy: 0.8953


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 7/27 - Loss: 4.0765 - Accuracy: 0.9345


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.0752 - Val Accuracy: 0.9210


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 8/27 - Loss: 3.9138 - Accuracy: 0.9368


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.7692 - Val Accuracy: 0.9398


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 9/27 - Loss: 3.8456 - Accuracy: 0.9311


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.7716 - Val Accuracy: 0.9325


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 10/27 - Loss: 3.6366 - Accuracy: 0.9341


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.6447 - Val Accuracy: 0.9316


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 11/27 - Loss: 3.5801 - Accuracy: 0.9355


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.4694 - Val Accuracy: 0.9301


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 12/27 - Loss: 3.5469 - Accuracy: 0.9389


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.5799 - Val Accuracy: 0.9289


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 13/27 - Loss: 3.6007 - Accuracy: 0.9361


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.8934 - Val Accuracy: 0.9303


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 14/27 - Loss: 3.6211 - Accuracy: 0.9365


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.1492 - Val Accuracy: 0.9086


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 15/27 - Loss: 3.5902 - Accuracy: 0.9294


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.6800 - Val Accuracy: 0.9341


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 16/27 - Loss: 3.5574 - Accuracy: 0.9380


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.5028 - Val Accuracy: 0.9318


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 17/27 - Loss: 3.5373 - Accuracy: 0.9381


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 3.8416 - Val Accuracy: 0.9237


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 18/27 - Loss: 3.8659 - Accuracy: 0.9367


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.5315 - Val Accuracy: 0.9301


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 19/27 - Loss: 4.4613 - Accuracy: 0.9374


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 4.7432 - Val Accuracy: 0.9237


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 20/27 - Loss: 5.0932 - Accuracy: 0.9320


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 5.5011 - Val Accuracy: 0.9295


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 21/27 - Loss: 5.7229 - Accuracy: 0.9314


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 5.7070 - Val Accuracy: 0.9176


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 22/27 - Loss: 6.1956 - Accuracy: 0.9282


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 7.2138 - Val Accuracy: 0.9332


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 23/27 - Loss: 7.4241 - Accuracy: 0.9297


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 7.7475 - Val Accuracy: 0.9280


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 24/27 - Loss: 8.2668 - Accuracy: 0.9347


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 9.4644 - Val Accuracy: 0.9320


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 25/27 - Loss: 9.1678 - Accuracy: 0.9242


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 10.6212 - Val Accuracy: 0.9118


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 26/27 - Loss: 9.5429 - Accuracy: 0.9258


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 9.4432 - Val Accuracy: 0.8744


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 27/27 - Loss: 9.3494 - Accuracy: 0.9283


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:15:37,451][0m Trial 14 finished with value: 0.9288546442985535 and parameters: {'loss_learning_rate': 0.0003521949619561741, 'learning_rate': 0.003491220123509423, 'weight_decay': 0.0013925287932739575, 'epsilon': 1.6233162463456852e-08, 'beta': 0.7000000000000001, 'gamma': 0.5, 'batch_size': 211, 'epochs': 27}. Best is trial 9 with value: 0.9407815933227539.[0m


Val Loss: 9.8846 - Val Accuracy: 0.9289
Learning rate for Loss: 0.0012461178931656581
Learning rate: 5.9014251513328354e-05
Weight decay: 0.0040923289831324835
Epsilon: 5.581713500702314e-08
Beta: 0.30000000000000004
Gamma: 0.8
Batch size: 138
Number of epochs: 38


Epochs:   0%|          | 0/38 [00:00<?, ?it/s]

Training:   0%|          | 0/102 [00:00<?, ?it/s]

Epoch: 1/38 - Loss: 18.6229 - Accuracy: 0.6277


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:16:08,742][0m Trial 15 pruned. [0m


Val Loss: 13.0821 - Val Accuracy: 0.7463
Learning rate for Loss: 0.00020802853306648198
Learning rate: 0.00037930231166035934
Weight decay: 0.00010140751261874564
Epsilon: 4.5197130435801855e-09
Beta: 0.6
Gamma: 0.4
Batch size: 294
Number of epochs: 19


Epochs:   0%|          | 0/19 [00:00<?, ?it/s]

Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 1/19 - Loss: 9.9511 - Accuracy: 0.7880


  0%|          | 0/6 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:16:40,606][0m Trial 16 pruned. [0m


Val Loss: 6.4421 - Val Accuracy: 0.8797
Learning rate for Loss: 0.0006715089324586991
Learning rate: 4.642770498324161e-05
Weight decay: 0.0013707912805161124
Epsilon: 1.3771588366562356e-08
Beta: 0.4
Gamma: 0.1
Batch size: 184
Number of epochs: 19


Epochs:   0%|          | 0/19 [00:00<?, ?it/s]

Training:   0%|          | 0/76 [00:00<?, ?it/s]

Epoch: 1/19 - Loss: 5.9754 - Accuracy: 0.5615


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:17:12,128][0m Trial 17 pruned. [0m


Val Loss: 5.0627 - Val Accuracy: 0.6652
Learning rate for Loss: 0.00029351983491643
Learning rate: 0.0027208749554250293
Weight decay: 0.0024992154899082953
Epsilon: 4.436055839981491e-08
Beta: 0.1
Gamma: 0.7000000000000001
Batch size: 116
Number of epochs: 37


Epochs:   0%|          | 0/37 [00:00<?, ?it/s]

Training:   0%|          | 0/121 [00:00<?, ?it/s]

Epoch: 1/37 - Loss: 6.4251 - Accuracy: 0.8390


  0%|          | 0/16 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:17:42,759][0m Trial 18 pruned. [0m


Val Loss: 3.9365 - Val Accuracy: 0.8998
Learning rate for Loss: 0.00014471502506216872
Learning rate: 0.00037692656333805245
Weight decay: 0.009615895087903113
Epsilon: 2.364131482327514e-09
Beta: 0.30000000000000004
Gamma: 0.9
Batch size: 56
Number of epochs: 67


Epochs:   0%|          | 0/67 [00:00<?, ?it/s]

Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 1/67 - Loss: 8.9137 - Accuracy: 0.8578


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 5.3671 - Val Accuracy: 0.9079


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 2/67 - Loss: 4.7701 - Accuracy: 0.9194


  0%|          | 0/32 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:18:43,947][0m Trial 19 pruned. [0m


Val Loss: 4.2882 - Val Accuracy: 0.9138

Study statistics: 
  Number of finished trials:  20
  Number of pruned trials:  13
  Number of complete trials:  7


In [24]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9407815933227539
  Params: 
    batch_size: 197
    beta: 0.6
    epochs: 12
    epsilon: 9.197975330985514e-09
    gamma: 0.6
    learning_rate: 0.0011009593653653477
    loss_learning_rate: 0.0001604275234379164
    weight_decay: 0.007787046924352808


In [None]:
# ViT P12-S8 Triplet ArcFace Mean

Best trial:
Value:  0.9407815933227539
Params: 
batch_size: 197
beta: 0.6
epochs: 12
epsilon: 9.197975330985514e-09
gamma: 0.6
learning_rate: 0.0011009593653653477
loss_learning_rate: 0.0001604275234379164
weight_decay: 0.007787046924352808