In [1]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from pytorch_metric_learning import losses
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm.notebook import tqdm

In [2]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir, train=False):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
        self.is_train = train
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]
        
        if self.is_train:
            positive_list = self.img_lbls[self.img_lbls.iloc[:, 1] == label].index.values
            positive_list = np.setdiff1d(positive_list, np.array([idx]))
            positive_item = random.choice(positive_list)
            positive_img = self.img_lbls.iloc[positive_item, 0]
            pos_img_path = os.path.join(self.img_dir, positive_img)
            pos_image = mx.image.imread(pos_img_path)
            if pos_image.shape[1] != 112:
                pos_image = mx.image.resize_short(pos_image, 112)
            pos_image = mx.nd.transpose(pos_image, axes=(2,0,1))
            pos_image = torch.tensor(pos_image.asnumpy()).type(torch.FloatTensor)
            
            negative_list = self.img_lbls[self.img_lbls.iloc[:, 1] != label].index.values
            negative_item = random.choice(negative_list)
            negative_img = self.img_lbls.iloc[negative_item, 0]
            neg_img_path = os.path.join(self.img_dir, negative_img)
            neg_image = mx.image.imread(neg_img_path)
            if neg_image.shape[1] != 112:
                neg_image = mx.image.resize_short(neg_image, 112)
            neg_image = mx.nd.transpose(neg_image, axes=(2,0,1))
            neg_image = torch.tensor(neg_image.asnumpy()).type(torch.FloatTensor)
            
            return image, pos_image, neg_image, label, img_file, positive_img, negative_img

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=False)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=False)

In [20]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=True)

In [7]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()

In [8]:
class CombinedLoss(nn.Module):
    def __init__(self, beta=1.0):
        super(CombinedLoss, self).__init__()
        self.beta = beta
        self.triplet = TripletLoss(margin=1.0)
        self.classification = nn.CrossEntropyLoss()
        
    def forward(self, anchor, positive, negative, classification_out, labels):
        triplet_loss = self.triplet(anchor, positive, negative)
        classification_loss = self.classification(classification_out, labels)
        total_loss = (self.beta * triplet_loss) + classification_loss
        
        return total_loss

In [9]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [10]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [11]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [12]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [13]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [14]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [15]:
class ViTs_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, ac_patch_size,
                         pad, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * ac_patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size
        self.soft_split = nn.Unfold(kernel_size=(ac_patch_size, ac_patch_size), stride=(self.patch_size, self.patch_size), padding=(pad, pad))


        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label= None , mask = None):
        p = self.patch_size
        x = self.soft_split(img).transpose(1, 2)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        # y = x[:, 0]
        z = x[:, 1:].mean(dim = 1)

        # y = self.to_latent(y)
        # emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        # emb = torch.cat((emb_y, emb_z), dim=1)
        emb = emb_z
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [16]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=512, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [17]:
model = ViTs_face(
            loss_type='CosFace',
            GPU_ID=[device],
            num_class=93431,
            image_size=112,
            patch_size=8,
            ac_patch_size=12,
            pad=4,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Face-Transformer/results/ViT-P12S8_ms1m_cosface/Backbone_VITs_Epoch_2_Batch_12000_Time_2021-03-17-04-05_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=1)]


<All keys matched successfully>

In [18]:
for param in model.parameters():
    param.requires_grad = False

In [19]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, _, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, _, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [21]:
best_accu = 0.0
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    loss_lr = trial.suggest_float("loss_learning_rate", 1e-4, 1e-2, log=True)
    arc_margin = losses.ArcFaceLoss(2, 512).to(device)
    cos_margin = losses.CosFaceLoss(2, 512).to(device)
    loss_optimizer_1 = opt.AdamW(arc_margin.parameters(), lr=loss_lr)
    loss_optimizer_2 = opt.AdamW(cos_margin.parameters(), lr=loss_lr)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    beta = trial.suggest_float("beta", 0.1, 1.0, step=0.1)
    gamma = trial.suggest_float("gamma", 0.1, 1.0, step=0.1)
    theta = trial.suggest_float("theta", 0.1, 1.0, step=0.1)
    criterion = CombinedLoss(beta=beta)
    
    batch_size = trial.suggest_int('batch_size', 50, 300)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate for Loss: "+ str(loss_lr))
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Beta: "+ str(beta))
    print("Gamma: "+ str(gamma))
    print("Theta: "+ str(theta))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(train_loader, desc="Training", leave=False):
            img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)

            x1 = file_to_embed(embeds, img_file)
            x2 = file_to_embed(embeds, pos_file)
            x3 = file_to_embed(embeds, neg_file)
            
            optimizer.zero_grad()
            anchor, output = model_xtr(x1)
            pos, _ = model_xtr(x2)
            neg, _ = model_xtr(x3)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            class_triplet_loss = criterion(anchor, pos, neg, output, label)
            arc_loss = arc_margin(anchor, label)
            cos_loss = cos_margin(anchor, label)
            loss = (theta * cos_loss) + (gamma * arc_loss) + class_triplet_loss
            loss.backward()
            loss_optimizer_1.step()
            loss_optimizer_2.step()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(val_loader):
                img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)
                
                x1 = file_to_embed(embeds, img_file)
                x2 = file_to_embed(embeds, pos_file)
                x3 = file_to_embed(embeds, neg_file)
                
                anchor, output = model_xtr(x1)
                pos, _ = model_xtr(x2)
                neg, _ = model_xtr(x3)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                class_triplet_loss = criterion(anchor, pos, neg, output, label)
                arc_loss = arc_margin(anchor, label)
                cos_loss = cos_margin(anchor, label)
                loss = (theta * cos_loss) + (gamma * arc_loss) + class_triplet_loss
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_12-8_triplet_arcface_cosface_mean_only.pt")
            
    return val_accu

In [22]:
study = optuna.create_study(direction='maximize',
                            study_name='triplet-arcface-cosface-12-8-mean-only-vit-study',
                            storage='sqlite:///study1.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=20)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-15 02:47:06,365][0m A new study created in RDB with name: triplet-arcface-cosface-12-8-mean-only-vit-study[0m


Learning rate for Loss: 0.00036175611374582095
Learning rate: 0.011027952677634808
Weight decay: 0.008778170739808407
Epsilon: 7.895725329894206e-08
Beta: 0.9
Gamma: 1.0
Theta: 0.4
Batch size: 209
Number of epochs: 59


Epochs:   0%|          | 0/59 [00:00<?, ?it/s]

Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 1/59 - Loss: 36.9503 - Accuracy: 0.8181


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 29.5063 - Val Accuracy: 0.9212


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 2/59 - Loss: 31.6541 - Accuracy: 0.9080


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 30.7918 - Val Accuracy: 0.9249


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 3/59 - Loss: 37.3133 - Accuracy: 0.8994


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 42.4022 - Val Accuracy: 0.9054


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 4/59 - Loss: 42.5039 - Accuracy: 0.9126


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 43.5089 - Val Accuracy: 0.9274


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 5/59 - Loss: 43.0397 - Accuracy: 0.9216


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 48.2069 - Val Accuracy: 0.9070


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 6/59 - Loss: 42.7882 - Accuracy: 0.9122


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 44.0487 - Val Accuracy: 0.9313


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 7/59 - Loss: 45.5703 - Accuracy: 0.9215


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 43.6009 - Val Accuracy: 0.8788


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 8/59 - Loss: 45.0401 - Accuracy: 0.9165


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 50.7942 - Val Accuracy: 0.9248


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 9/59 - Loss: 46.5641 - Accuracy: 0.9293


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 48.2944 - Val Accuracy: 0.9175


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 10/59 - Loss: 48.1524 - Accuracy: 0.9274


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 54.0943 - Val Accuracy: 0.9297


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 11/59 - Loss: 46.4941 - Accuracy: 0.9337


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 53.2857 - Val Accuracy: 0.9339


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 12/59 - Loss: 50.8118 - Accuracy: 0.9255


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 60.3440 - Val Accuracy: 0.9338


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 13/59 - Loss: 46.1030 - Accuracy: 0.9297


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.2277 - Val Accuracy: 0.9190


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 14/59 - Loss: 48.3922 - Accuracy: 0.9218


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 45.2608 - Val Accuracy: 0.9222


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 15/59 - Loss: 45.9253 - Accuracy: 0.9300


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 47.2306 - Val Accuracy: 0.9338


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 16/59 - Loss: 47.3977 - Accuracy: 0.9225


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 50.3159 - Val Accuracy: 0.9281


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 17/59 - Loss: 46.2824 - Accuracy: 0.9326


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 47.7836 - Val Accuracy: 0.9160


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 18/59 - Loss: 47.4281 - Accuracy: 0.9250


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 45.5535 - Val Accuracy: 0.8989


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 19/59 - Loss: 45.0028 - Accuracy: 0.9240


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 45.1890 - Val Accuracy: 0.9239


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 20/59 - Loss: 46.7645 - Accuracy: 0.9230


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.4186 - Val Accuracy: 0.9259


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 21/59 - Loss: 51.7093 - Accuracy: 0.9294


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 49.4102 - Val Accuracy: 0.9195


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 22/59 - Loss: 47.7949 - Accuracy: 0.9169


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 53.3541 - Val Accuracy: 0.8985


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 23/59 - Loss: 51.2215 - Accuracy: 0.9109


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.2005 - Val Accuracy: 0.9170


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 24/59 - Loss: 47.0977 - Accuracy: 0.9313


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 45.2004 - Val Accuracy: 0.9344


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 25/59 - Loss: 50.2304 - Accuracy: 0.9298


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 49.0632 - Val Accuracy: 0.9070


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 26/59 - Loss: 46.5092 - Accuracy: 0.9238


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.3975 - Val Accuracy: 0.9354


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 27/59 - Loss: 50.5063 - Accuracy: 0.9013


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 55.7689 - Val Accuracy: 0.9354


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 28/59 - Loss: 50.1246 - Accuracy: 0.9260


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.4525 - Val Accuracy: 0.9391


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 29/59 - Loss: 50.9619 - Accuracy: 0.9281


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 46.0539 - Val Accuracy: 0.9150


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 30/59 - Loss: 48.9272 - Accuracy: 0.9302


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 57.4520 - Val Accuracy: 0.9291


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 31/59 - Loss: 49.5332 - Accuracy: 0.9327


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 47.1981 - Val Accuracy: 0.9417


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 32/59 - Loss: 48.9518 - Accuracy: 0.9310


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 50.7665 - Val Accuracy: 0.9249


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 33/59 - Loss: 49.4878 - Accuracy: 0.9308


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 54.2132 - Val Accuracy: 0.9222


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 34/59 - Loss: 46.6160 - Accuracy: 0.9283


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 48.6722 - Val Accuracy: 0.9054


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 35/59 - Loss: 48.9948 - Accuracy: 0.9253


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 47.5005 - Val Accuracy: 0.9334


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 36/59 - Loss: 50.2322 - Accuracy: 0.9328


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 55.9741 - Val Accuracy: 0.9206


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 37/59 - Loss: 50.8121 - Accuracy: 0.9134


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 49.8798 - Val Accuracy: 0.9133


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 38/59 - Loss: 49.0131 - Accuracy: 0.9321


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 45.8813 - Val Accuracy: 0.9364


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 39/59 - Loss: 46.9368 - Accuracy: 0.9288


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 51.9344 - Val Accuracy: 0.8864


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 40/59 - Loss: 48.0514 - Accuracy: 0.9128


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.8019 - Val Accuracy: 0.9275


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 41/59 - Loss: 49.7434 - Accuracy: 0.9354


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 57.9263 - Val Accuracy: 0.9335


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 42/59 - Loss: 48.2599 - Accuracy: 0.9353


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 53.7967 - Val Accuracy: 0.9333


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 43/59 - Loss: 53.3344 - Accuracy: 0.9308


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 51.6445 - Val Accuracy: 0.9205


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 44/59 - Loss: 53.4416 - Accuracy: 0.9332


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 57.1383 - Val Accuracy: 0.9360


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 45/59 - Loss: 52.5409 - Accuracy: 0.9305


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 55.6629 - Val Accuracy: 0.9339


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 46/59 - Loss: 52.2690 - Accuracy: 0.9364


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.3488 - Val Accuracy: 0.9281


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 47/59 - Loss: 48.6465 - Accuracy: 0.9364


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 57.4127 - Val Accuracy: 0.9275


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 48/59 - Loss: 52.5613 - Accuracy: 0.9354


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 67.3143 - Val Accuracy: 0.9189


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 49/59 - Loss: 51.6143 - Accuracy: 0.9333


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 56.7104 - Val Accuracy: 0.9254


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 50/59 - Loss: 50.0874 - Accuracy: 0.9346


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 49.5810 - Val Accuracy: 0.9345


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 51/59 - Loss: 51.2985 - Accuracy: 0.9329


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 56.4563 - Val Accuracy: 0.9311


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 52/59 - Loss: 51.9394 - Accuracy: 0.9211


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.2496 - Val Accuracy: 0.9175


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 53/59 - Loss: 47.6876 - Accuracy: 0.9244


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 48.4523 - Val Accuracy: 0.9265


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 54/59 - Loss: 49.7357 - Accuracy: 0.9293


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.0852 - Val Accuracy: 0.9308


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 55/59 - Loss: 49.2668 - Accuracy: 0.9330


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 46.0817 - Val Accuracy: 0.9291


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 56/59 - Loss: 48.8796 - Accuracy: 0.9306


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 52.2195 - Val Accuracy: 0.9369


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 57/59 - Loss: 48.3506 - Accuracy: 0.9319


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 43.9097 - Val Accuracy: 0.9365


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 58/59 - Loss: 47.3874 - Accuracy: 0.9301


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 48.2812 - Val Accuracy: 0.9102


Training:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch: 59/59 - Loss: 48.1943 - Accuracy: 0.9240


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-15 03:17:57,646][0m Trial 0 finished with value: 0.9207649230957031 and parameters: {'loss_learning_rate': 0.00036175611374582095, 'learning_rate': 0.011027952677634808, 'weight_decay': 0.008778170739808407, 'epsilon': 7.895725329894206e-08, 'beta': 0.9, 'gamma': 1.0, 'theta': 0.4, 'batch_size': 209, 'epochs': 59}. Best is trial 0 with value: 0.9207649230957031.[0m


Val Loss: 59.4514 - Val Accuracy: 0.9208
Saving best model...
Learning rate for Loss: 0.002924319432683996
Learning rate: 8.968383341493482e-05
Weight decay: 0.00032283286559799346
Epsilon: 6.595985880937434e-09
Beta: 0.6
Gamma: 0.30000000000000004
Theta: 0.6
Batch size: 159
Number of epochs: 24


Epochs:   0%|          | 0/24 [00:00<?, ?it/s]

Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 1/24 - Loss: 21.0052 - Accuracy: 0.6482


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 13.6454 - Val Accuracy: 0.8190


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 2/24 - Loss: 10.6641 - Accuracy: 0.8571


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 8.2349 - Val Accuracy: 0.8883


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 3/24 - Loss: 7.5569 - Accuracy: 0.8972


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 6.6018 - Val Accuracy: 0.9095


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 4/24 - Loss: 6.2884 - Accuracy: 0.9150


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 5.8472 - Val Accuracy: 0.9169


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 5/24 - Loss: 5.7494 - Accuracy: 0.9223


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 5.5633 - Val Accuracy: 0.9209


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 6/24 - Loss: 5.3326 - Accuracy: 0.9288


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 5.1694 - Val Accuracy: 0.9238


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 7/24 - Loss: 5.1585 - Accuracy: 0.9302


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 5.0882 - Val Accuracy: 0.9255


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 8/24 - Loss: 4.9073 - Accuracy: 0.9341


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.8890 - Val Accuracy: 0.9284


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 9/24 - Loss: 4.7397 - Accuracy: 0.9357


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.8373 - Val Accuracy: 0.9295


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 10/24 - Loss: 4.5672 - Accuracy: 0.9379


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 5.0254 - Val Accuracy: 0.9273


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 11/24 - Loss: 4.4910 - Accuracy: 0.9398


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.7278 - Val Accuracy: 0.9284


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 12/24 - Loss: 4.4121 - Accuracy: 0.9400


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.7406 - Val Accuracy: 0.9249


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 13/24 - Loss: 4.2871 - Accuracy: 0.9425


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.3689 - Val Accuracy: 0.9353


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 14/24 - Loss: 4.2344 - Accuracy: 0.9421


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.6664 - Val Accuracy: 0.9306


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 15/24 - Loss: 4.1894 - Accuracy: 0.9438


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.3081 - Val Accuracy: 0.9393


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 16/24 - Loss: 4.1277 - Accuracy: 0.9425


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.3182 - Val Accuracy: 0.9375


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 17/24 - Loss: 4.0808 - Accuracy: 0.9428


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.3630 - Val Accuracy: 0.9393


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 18/24 - Loss: 4.0941 - Accuracy: 0.9419


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.3125 - Val Accuracy: 0.9347


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 19/24 - Loss: 3.9410 - Accuracy: 0.9443


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.1786 - Val Accuracy: 0.9393


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 20/24 - Loss: 3.9047 - Accuracy: 0.9461


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.2309 - Val Accuracy: 0.9364


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 21/24 - Loss: 3.8916 - Accuracy: 0.9458


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.7607 - Val Accuracy: 0.9209


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 22/24 - Loss: 3.9683 - Accuracy: 0.9444


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.4436 - Val Accuracy: 0.9290


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 23/24 - Loss: 3.8303 - Accuracy: 0.9481


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.1067 - Val Accuracy: 0.9387


Training:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch: 24/24 - Loss: 3.7930 - Accuracy: 0.9470


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-12-15 03:30:47,805][0m Trial 1 finished with value: 0.9370035529136658 and parameters: {'loss_learning_rate': 0.002924319432683996, 'learning_rate': 8.968383341493482e-05, 'weight_decay': 0.00032283286559799346, 'epsilon': 6.595985880937434e-09, 'beta': 0.6, 'gamma': 0.30000000000000004, 'theta': 0.6, 'batch_size': 159, 'epochs': 24}. Best is trial 1 with value: 0.9370035529136658.[0m


Val Loss: 4.1150 - Val Accuracy: 0.9370
Saving best model...
Learning rate for Loss: 0.0005386972203835065
Learning rate: 1.681845977859957e-05
Weight decay: 0.003244036564460812
Epsilon: 1.8414959377464864e-08
Beta: 0.6
Gamma: 0.5
Theta: 0.8
Batch size: 257
Number of epochs: 26


Epochs:   0%|          | 0/26 [00:00<?, ?it/s]

Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 1/26 - Loss: 38.0746 - Accuracy: 0.5145


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 36.0092 - Val Accuracy: 0.5273


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 2/26 - Loss: 34.3176 - Accuracy: 0.5661


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 32.5063 - Val Accuracy: 0.5786


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 3/26 - Loss: 31.0058 - Accuracy: 0.6153


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 29.4805 - Val Accuracy: 0.6510


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 4/26 - Loss: 28.0353 - Accuracy: 0.6738


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 26.2185 - Val Accuracy: 0.6986


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 5/26 - Loss: 24.9707 - Accuracy: 0.7301


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 23.1907 - Val Accuracy: 0.7612


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 6/26 - Loss: 22.0221 - Accuracy: 0.7810


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 20.5986 - Val Accuracy: 0.7982


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 7/26 - Loss: 19.6860 - Accuracy: 0.8115


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 18.4518 - Val Accuracy: 0.8216


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 8/26 - Loss: 17.8008 - Accuracy: 0.8333


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 16.8736 - Val Accuracy: 0.8471


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 9/26 - Loss: 16.2605 - Accuracy: 0.8478


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 15.2312 - Val Accuracy: 0.8603


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 10/26 - Loss: 14.8629 - Accuracy: 0.8593


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 13.9096 - Val Accuracy: 0.8776


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 11/26 - Loss: 13.7794 - Accuracy: 0.8693


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 13.1025 - Val Accuracy: 0.8803


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 12/26 - Loss: 12.8870 - Accuracy: 0.8770


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 12.3800 - Val Accuracy: 0.8865


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 13/26 - Loss: 12.1891 - Accuracy: 0.8835


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 11.6680 - Val Accuracy: 0.8937


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 14/26 - Loss: 11.6211 - Accuracy: 0.8891


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 11.3774 - Val Accuracy: 0.8911


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 15/26 - Loss: 11.1728 - Accuracy: 0.8923


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 10.8436 - Val Accuracy: 0.8959


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 16/26 - Loss: 10.6912 - Accuracy: 0.8985


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 10.4015 - Val Accuracy: 0.8991


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 17/26 - Loss: 10.2466 - Accuracy: 0.9018


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 10.0286 - Val Accuracy: 0.9027


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 18/26 - Loss: 9.8574 - Accuracy: 0.9056


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 9.6973 - Val Accuracy: 0.9067


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 19/26 - Loss: 9.6456 - Accuracy: 0.9071


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 9.4012 - Val Accuracy: 0.9038


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 20/26 - Loss: 9.3780 - Accuracy: 0.9106


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 9.1273 - Val Accuracy: 0.9126


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 21/26 - Loss: 9.0582 - Accuracy: 0.9131


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 8.9751 - Val Accuracy: 0.9102


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 22/26 - Loss: 8.9532 - Accuracy: 0.9152


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 8.6831 - Val Accuracy: 0.9102


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 23/26 - Loss: 8.6598 - Accuracy: 0.9169


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 8.5309 - Val Accuracy: 0.9140


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 24/26 - Loss: 8.5189 - Accuracy: 0.9178


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 8.3563 - Val Accuracy: 0.9151


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 25/26 - Loss: 8.3116 - Accuracy: 0.9194


  0%|          | 0/7 [00:00<?, ?it/s]

Val Loss: 8.3284 - Val Accuracy: 0.9157


Training:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch: 26/26 - Loss: 8.1700 - Accuracy: 0.9204


  0%|          | 0/7 [00:00<?, ?it/s]

[32m[I 2023-12-15 03:44:27,834][0m Trial 2 finished with value: 0.9166258573532104 and parameters: {'loss_learning_rate': 0.0005386972203835065, 'learning_rate': 1.681845977859957e-05, 'weight_decay': 0.003244036564460812, 'epsilon': 1.8414959377464864e-08, 'beta': 0.6, 'gamma': 0.5, 'theta': 0.8, 'batch_size': 257, 'epochs': 26}. Best is trial 1 with value: 0.9370035529136658.[0m


Val Loss: 8.2686 - Val Accuracy: 0.9166
Learning rate for Loss: 0.0054702503735216835
Learning rate: 0.009202269004514272
Weight decay: 0.0007388017682293908
Epsilon: 5.572286169866689e-09
Beta: 0.4
Gamma: 0.30000000000000004
Theta: 0.8
Batch size: 197
Number of epochs: 66


Epochs:   0%|          | 0/66 [00:00<?, ?it/s]

Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 1/66 - Loss: 25.6448 - Accuracy: 0.8108


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 15.7426 - Val Accuracy: 0.8964


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 2/66 - Loss: 13.6912 - Accuracy: 0.9087


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 12.4518 - Val Accuracy: 0.9109


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 3/66 - Loss: 12.8954 - Accuracy: 0.9224


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 14.7914 - Val Accuracy: 0.9153


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 4/66 - Loss: 13.2008 - Accuracy: 0.9250


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 14.9418 - Val Accuracy: 0.9181


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 5/66 - Loss: 14.5825 - Accuracy: 0.9289


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 17.1381 - Val Accuracy: 0.9203


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 6/66 - Loss: 15.0723 - Accuracy: 0.9236


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 17.5112 - Val Accuracy: 0.9215


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 7/66 - Loss: 15.7201 - Accuracy: 0.9287


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 14.8704 - Val Accuracy: 0.9362


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 8/66 - Loss: 15.4219 - Accuracy: 0.9340


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 16.7079 - Val Accuracy: 0.9235


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 9/66 - Loss: 16.2244 - Accuracy: 0.9357


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 15.4895 - Val Accuracy: 0.9360


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 10/66 - Loss: 16.6576 - Accuracy: 0.9370


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 17.2229 - Val Accuracy: 0.9334


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 11/66 - Loss: 18.2718 - Accuracy: 0.9370


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 14.7359 - Val Accuracy: 0.9327


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 12/66 - Loss: 18.5822 - Accuracy: 0.9343


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 17.2894 - Val Accuracy: 0.9322


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 13/66 - Loss: 16.9002 - Accuracy: 0.9387


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 18.0027 - Val Accuracy: 0.9385


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 14/66 - Loss: 17.2205 - Accuracy: 0.9354


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 18.6952 - Val Accuracy: 0.9343


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 15/66 - Loss: 18.5382 - Accuracy: 0.9362


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.8879 - Val Accuracy: 0.9038


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 16/66 - Loss: 19.7205 - Accuracy: 0.9331


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.9278 - Val Accuracy: 0.9252


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 17/66 - Loss: 19.6295 - Accuracy: 0.9326


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.1711 - Val Accuracy: 0.9302


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 18/66 - Loss: 17.7071 - Accuracy: 0.9389


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.1449 - Val Accuracy: 0.9288


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 19/66 - Loss: 17.6862 - Accuracy: 0.9371


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 18.7322 - Val Accuracy: 0.9309


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 20/66 - Loss: 18.5118 - Accuracy: 0.9403


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.9373 - Val Accuracy: 0.9341


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 21/66 - Loss: 18.6212 - Accuracy: 0.9399


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.3715 - Val Accuracy: 0.9194


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 22/66 - Loss: 19.0290 - Accuracy: 0.9356


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 14.8900 - Val Accuracy: 0.9375


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 23/66 - Loss: 19.0426 - Accuracy: 0.9298


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.5948 - Val Accuracy: 0.9253


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 24/66 - Loss: 18.9720 - Accuracy: 0.9343


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.3066 - Val Accuracy: 0.9308


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 25/66 - Loss: 20.0765 - Accuracy: 0.9360


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.8279 - Val Accuracy: 0.9320


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 26/66 - Loss: 19.5690 - Accuracy: 0.9365


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.4546 - Val Accuracy: 0.9325


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 27/66 - Loss: 18.7606 - Accuracy: 0.9364


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.3130 - Val Accuracy: 0.9156


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 28/66 - Loss: 19.1379 - Accuracy: 0.9367


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.6064 - Val Accuracy: 0.9274


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 29/66 - Loss: 19.7184 - Accuracy: 0.9323


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 18.8784 - Val Accuracy: 0.9283


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 30/66 - Loss: 19.4839 - Accuracy: 0.9397


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.1721 - Val Accuracy: 0.9370


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 31/66 - Loss: 19.7916 - Accuracy: 0.9376


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.6327 - Val Accuracy: 0.9207


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 32/66 - Loss: 19.4699 - Accuracy: 0.9323


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.6774 - Val Accuracy: 0.9355


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 33/66 - Loss: 19.8889 - Accuracy: 0.9359


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.0367 - Val Accuracy: 0.9377


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 34/66 - Loss: 18.9230 - Accuracy: 0.9298


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.8218 - Val Accuracy: 0.9261


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 35/66 - Loss: 19.9663 - Accuracy: 0.9264


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.9680 - Val Accuracy: 0.9352


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 36/66 - Loss: 20.2891 - Accuracy: 0.9277


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 18.8617 - Val Accuracy: 0.9374


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 37/66 - Loss: 20.2233 - Accuracy: 0.9382


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.0966 - Val Accuracy: 0.9307


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 38/66 - Loss: 19.6607 - Accuracy: 0.9379


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.0359 - Val Accuracy: 0.9298


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 39/66 - Loss: 20.4568 - Accuracy: 0.9350


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.3835 - Val Accuracy: 0.9100


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 40/66 - Loss: 19.6852 - Accuracy: 0.9365


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 20.2060 - Val Accuracy: 0.9385


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 41/66 - Loss: 20.5050 - Accuracy: 0.9274


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.7771 - Val Accuracy: 0.9216


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 42/66 - Loss: 19.7194 - Accuracy: 0.9366


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.0184 - Val Accuracy: 0.8932


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 43/66 - Loss: 20.7955 - Accuracy: 0.9211


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 22.1005 - Val Accuracy: 0.9321


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 44/66 - Loss: 19.5860 - Accuracy: 0.9345


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 23.3756 - Val Accuracy: 0.9334


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 45/66 - Loss: 21.4018 - Accuracy: 0.9368


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.6950 - Val Accuracy: 0.9274


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 46/66 - Loss: 20.0557 - Accuracy: 0.9339


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 23.4345 - Val Accuracy: 0.9082


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 47/66 - Loss: 19.8323 - Accuracy: 0.9231


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 22.7554 - Val Accuracy: 0.9131


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 48/66 - Loss: 21.0296 - Accuracy: 0.9333


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 22.3383 - Val Accuracy: 0.9242


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 49/66 - Loss: 20.4876 - Accuracy: 0.9299


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.3985 - Val Accuracy: 0.9314


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 50/66 - Loss: 20.1495 - Accuracy: 0.9386


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.1129 - Val Accuracy: 0.9320


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 51/66 - Loss: 20.0413 - Accuracy: 0.9368


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 24.1899 - Val Accuracy: 0.8868


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 52/66 - Loss: 18.6182 - Accuracy: 0.9392


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 22.8386 - Val Accuracy: 0.9341


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 53/66 - Loss: 19.8919 - Accuracy: 0.9362


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.2547 - Val Accuracy: 0.9304


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 54/66 - Loss: 19.8992 - Accuracy: 0.9319


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.7572 - Val Accuracy: 0.9388


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 55/66 - Loss: 21.0858 - Accuracy: 0.9243


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.3980 - Val Accuracy: 0.9379


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 56/66 - Loss: 20.4084 - Accuracy: 0.9266


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.4331 - Val Accuracy: 0.9323


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 57/66 - Loss: 19.9613 - Accuracy: 0.9371


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.6113 - Val Accuracy: 0.9280


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 58/66 - Loss: 19.2844 - Accuracy: 0.9336


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.4381 - Val Accuracy: 0.9315


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 59/66 - Loss: 19.9661 - Accuracy: 0.9284


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.1683 - Val Accuracy: 0.9434


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 60/66 - Loss: 20.7531 - Accuracy: 0.9091


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 23.0511 - Val Accuracy: 0.9262


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 61/66 - Loss: 21.7348 - Accuracy: 0.9247


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 22.2162 - Val Accuracy: 0.9206


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 62/66 - Loss: 19.9746 - Accuracy: 0.9350


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 21.8199 - Val Accuracy: 0.9216


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 63/66 - Loss: 20.3733 - Accuracy: 0.9302


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 19.4223 - Val Accuracy: 0.9378


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 64/66 - Loss: 19.9181 - Accuracy: 0.9280


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 25.4623 - Val Accuracy: 0.9348


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 65/66 - Loss: 20.2834 - Accuracy: 0.9348


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 23.1159 - Val Accuracy: 0.9136


Training:   0%|          | 0/71 [00:00<?, ?it/s]

Epoch: 66/66 - Loss: 20.5294 - Accuracy: 0.9358


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:18:39,831][0m Trial 3 finished with value: 0.8987674713134766 and parameters: {'loss_learning_rate': 0.0054702503735216835, 'learning_rate': 0.009202269004514272, 'weight_decay': 0.0007388017682293908, 'epsilon': 5.572286169866689e-09, 'beta': 0.4, 'gamma': 0.30000000000000004, 'theta': 0.8, 'batch_size': 197, 'epochs': 66}. Best is trial 1 with value: 0.9370035529136658.[0m


Val Loss: 21.5027 - Val Accuracy: 0.8988
Learning rate for Loss: 0.00014334037370261347
Learning rate: 0.05150753160551198
Weight decay: 0.0001374326272149175
Epsilon: 1.2094433518129794e-08
Beta: 0.5
Gamma: 0.9
Theta: 0.4
Batch size: 206
Number of epochs: 55


Epochs:   0%|          | 0/55 [00:00<?, ?it/s]

Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 1/55 - Loss: 271.9030 - Accuracy: 0.7642


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 191.3868 - Val Accuracy: 0.8782


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 2/55 - Loss: 287.6331 - Accuracy: 0.8618


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 353.3134 - Val Accuracy: 0.9013


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 3/55 - Loss: 381.7411 - Accuracy: 0.8692


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 384.2695 - Val Accuracy: 0.9159


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 4/55 - Loss: 478.2775 - Accuracy: 0.8974


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 544.7665 - Val Accuracy: 0.8991


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 5/55 - Loss: 514.6498 - Accuracy: 0.8968


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 539.7322 - Val Accuracy: 0.8942


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 6/55 - Loss: 524.5865 - Accuracy: 0.9095


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 700.5447 - Val Accuracy: 0.9093


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 7/55 - Loss: 542.7145 - Accuracy: 0.9034


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 540.2585 - Val Accuracy: 0.9070


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 8/55 - Loss: 534.9491 - Accuracy: 0.9081


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 487.7867 - Val Accuracy: 0.9224


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 9/55 - Loss: 524.1468 - Accuracy: 0.9031


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 477.7650 - Val Accuracy: 0.9267


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 10/55 - Loss: 512.0501 - Accuracy: 0.9118


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 622.2100 - Val Accuracy: 0.9189


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 11/55 - Loss: 535.0046 - Accuracy: 0.9131


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 667.4280 - Val Accuracy: 0.8343


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 12/55 - Loss: 546.4609 - Accuracy: 0.9086


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 541.8692 - Val Accuracy: 0.8810


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 13/55 - Loss: 488.1210 - Accuracy: 0.9127


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 512.3475 - Val Accuracy: 0.9317


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 14/55 - Loss: 468.1686 - Accuracy: 0.9120


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 570.1423 - Val Accuracy: 0.9257


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 15/55 - Loss: 505.0452 - Accuracy: 0.9089


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 597.8038 - Val Accuracy: 0.8796


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 16/55 - Loss: 487.1719 - Accuracy: 0.9109


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 556.3682 - Val Accuracy: 0.9343


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 17/55 - Loss: 516.9171 - Accuracy: 0.9168


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 569.4211 - Val Accuracy: 0.9340


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 18/55 - Loss: 542.7261 - Accuracy: 0.9156


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 575.7581 - Val Accuracy: 0.9229


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 19/55 - Loss: 514.1974 - Accuracy: 0.9169


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 488.4901 - Val Accuracy: 0.9125


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 20/55 - Loss: 509.9046 - Accuracy: 0.9152


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 529.0381 - Val Accuracy: 0.9111


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 21/55 - Loss: 524.6517 - Accuracy: 0.9146


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 498.7905 - Val Accuracy: 0.9037


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 22/55 - Loss: 522.0397 - Accuracy: 0.9107


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 548.9431 - Val Accuracy: 0.9269


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 23/55 - Loss: 528.1342 - Accuracy: 0.9194


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 633.0838 - Val Accuracy: 0.9295


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 24/55 - Loss: 583.0814 - Accuracy: 0.9127


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 569.7098 - Val Accuracy: 0.9278


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 25/55 - Loss: 538.9450 - Accuracy: 0.9210


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 620.4077 - Val Accuracy: 0.9332


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 26/55 - Loss: 557.9545 - Accuracy: 0.9138


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 645.9598 - Val Accuracy: 0.9120


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 27/55 - Loss: 586.1323 - Accuracy: 0.9154


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 646.9444 - Val Accuracy: 0.9190


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 28/55 - Loss: 558.9164 - Accuracy: 0.9165


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 555.8726 - Val Accuracy: 0.8946


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 29/55 - Loss: 559.5134 - Accuracy: 0.9199


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 558.8235 - Val Accuracy: 0.9173


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 30/55 - Loss: 579.2491 - Accuracy: 0.9145


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 643.4333 - Val Accuracy: 0.9206


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 31/55 - Loss: 555.4955 - Accuracy: 0.9196


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 543.0916 - Val Accuracy: 0.8745


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 32/55 - Loss: 559.5805 - Accuracy: 0.9126


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 556.0943 - Val Accuracy: 0.9200


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 33/55 - Loss: 580.1224 - Accuracy: 0.9173


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 553.2977 - Val Accuracy: 0.9202


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 34/55 - Loss: 535.3759 - Accuracy: 0.9199


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 536.4460 - Val Accuracy: 0.9203


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 35/55 - Loss: 550.1733 - Accuracy: 0.9197


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 547.3008 - Val Accuracy: 0.8405


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 36/55 - Loss: 535.6696 - Accuracy: 0.9158


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 590.4286 - Val Accuracy: 0.9300


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 37/55 - Loss: 576.9396 - Accuracy: 0.9176


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 607.1710 - Val Accuracy: 0.9154


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 38/55 - Loss: 546.9994 - Accuracy: 0.9189


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 633.2750 - Val Accuracy: 0.9066


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 39/55 - Loss: 542.9780 - Accuracy: 0.9176


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 650.4020 - Val Accuracy: 0.9148


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 40/55 - Loss: 573.3983 - Accuracy: 0.9180


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 612.1233 - Val Accuracy: 0.9111


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 41/55 - Loss: 598.1119 - Accuracy: 0.9180


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 657.5984 - Val Accuracy: 0.9012


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 42/55 - Loss: 571.0573 - Accuracy: 0.9191


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 627.7940 - Val Accuracy: 0.9273


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 43/55 - Loss: 560.6689 - Accuracy: 0.9165


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 561.2298 - Val Accuracy: 0.9245


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 44/55 - Loss: 582.2038 - Accuracy: 0.9197


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 579.4456 - Val Accuracy: 0.9218


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 45/55 - Loss: 567.7935 - Accuracy: 0.9157


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 660.5632 - Val Accuracy: 0.9206


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 46/55 - Loss: 568.6349 - Accuracy: 0.9155


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 666.2596 - Val Accuracy: 0.9289


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 47/55 - Loss: 539.5741 - Accuracy: 0.9231


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 559.1552 - Val Accuracy: 0.9126


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 48/55 - Loss: 512.9437 - Accuracy: 0.9223


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 569.9210 - Val Accuracy: 0.9196


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 49/55 - Loss: 557.1842 - Accuracy: 0.9179


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 537.8292 - Val Accuracy: 0.9410


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 50/55 - Loss: 513.3461 - Accuracy: 0.9277


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 651.7761 - Val Accuracy: 0.9128


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 51/55 - Loss: 567.3463 - Accuracy: 0.9177


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 530.9289 - Val Accuracy: 0.9306


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 52/55 - Loss: 575.1226 - Accuracy: 0.9201


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 636.8415 - Val Accuracy: 0.9054


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 53/55 - Loss: 562.2879 - Accuracy: 0.9224


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 574.0555 - Val Accuracy: 0.9328


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 54/55 - Loss: 599.9982 - Accuracy: 0.9244


  0%|          | 0/9 [00:00<?, ?it/s]

Val Loss: 581.0154 - Val Accuracy: 0.9207


Training:   0%|          | 0/68 [00:00<?, ?it/s]

Epoch: 55/55 - Loss: 586.9237 - Accuracy: 0.9169


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:46:52,198][0m Trial 4 finished with value: 0.9066269993782043 and parameters: {'loss_learning_rate': 0.00014334037370261347, 'learning_rate': 0.05150753160551198, 'weight_decay': 0.0001374326272149175, 'epsilon': 1.2094433518129794e-08, 'beta': 0.5, 'gamma': 0.9, 'theta': 0.4, 'batch_size': 206, 'epochs': 55}. Best is trial 1 with value: 0.9370035529136658.[0m


Val Loss: 602.8806 - Val Accuracy: 0.9066
Learning rate for Loss: 0.000588679538223633
Learning rate: 2.885127760273135e-05
Weight decay: 0.00028789709113813714
Epsilon: 1.6164824290487004e-08
Beta: 0.5
Gamma: 0.4
Theta: 0.5
Batch size: 68
Number of epochs: 70


Epochs:   0%|          | 0/70 [00:00<?, ?it/s]

Training:   0%|          | 0/206 [00:00<?, ?it/s]

Epoch: 1/70 - Loss: 23.4544 - Accuracy: 0.5770


  0%|          | 0/26 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:47:23,335][0m Trial 5 pruned. [0m


Val Loss: 18.8751 - Val Accuracy: 0.6996
Learning rate for Loss: 0.0026082441830565517
Learning rate: 0.021826790860257798
Weight decay: 0.0019808449263245065
Epsilon: 4.594111289914856e-09
Beta: 0.5
Gamma: 0.6
Theta: 0.30000000000000004
Batch size: 104
Number of epochs: 96


Epochs:   0%|          | 0/96 [00:00<?, ?it/s]

Training:   0%|          | 0/135 [00:00<?, ?it/s]

Epoch: 1/96 - Loss: 109.4239 - Accuracy: 0.8045


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:47:54,490][0m Trial 6 pruned. [0m


Val Loss: 142.0440 - Val Accuracy: 0.8032
Learning rate for Loss: 0.000333646037008515
Learning rate: 2.504949040408788e-05
Weight decay: 0.0003438370627176441
Epsilon: 4.6719395255810734e-09
Beta: 0.30000000000000004
Gamma: 0.7000000000000001
Theta: 0.4
Batch size: 122
Number of epochs: 96


Epochs:   0%|          | 0/96 [00:00<?, ?it/s]

Training:   0%|          | 0/115 [00:00<?, ?it/s]

Epoch: 1/96 - Loss: 28.7778 - Accuracy: 0.5309


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:48:25,566][0m Trial 7 pruned. [0m


Val Loss: 24.1202 - Val Accuracy: 0.5661
Learning rate for Loss: 0.00020414772099151932
Learning rate: 0.006264639503957515
Weight decay: 0.0095182578280005
Epsilon: 2.264287352722685e-08
Beta: 0.6
Gamma: 0.9
Theta: 0.7000000000000001
Batch size: 193
Number of epochs: 74


Epochs:   0%|          | 0/74 [00:00<?, ?it/s]

Training:   0%|          | 0/73 [00:00<?, ?it/s]

Epoch: 1/74 - Loss: 24.9069 - Accuracy: 0.8019


  0%|          | 0/10 [00:00<?, ?it/s]

Val Loss: 16.5968 - Val Accuracy: 0.8973


Training:   0%|          | 0/73 [00:00<?, ?it/s]

Epoch: 2/74 - Loss: 14.8290 - Accuracy: 0.9166


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:49:28,942][0m Trial 8 pruned. [0m


Val Loss: 16.3886 - Val Accuracy: 0.8704
Learning rate for Loss: 0.0007226996894730965
Learning rate: 0.004942955578672351
Weight decay: 0.0005584515641903908
Epsilon: 9.253079182957533e-08
Beta: 0.5
Gamma: 0.7000000000000001
Theta: 0.7000000000000001
Batch size: 127
Number of epochs: 77


Epochs:   0%|          | 0/77 [00:00<?, ?it/s]

Training:   0%|          | 0/111 [00:00<?, ?it/s]

Epoch: 1/77 - Loss: 19.0616 - Accuracy: 0.8669


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:50:00,140][0m Trial 9 pruned. [0m


Val Loss: 17.4117 - Val Accuracy: 0.8739
Learning rate for Loss: 0.001937798668445542
Learning rate: 0.0002772336230334728
Weight decay: 0.00011215196125213501
Epsilon: 1.7713145459677538e-09
Beta: 0.1
Gamma: 0.2
Theta: 0.1
Batch size: 297
Number of epochs: 10


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/48 [00:00<?, ?it/s]

Epoch: 1/10 - Loss: 6.4254 - Accuracy: 0.7560


  0%|          | 0/6 [00:00<?, ?it/s]

[32m[I 2023-12-15 04:50:32,362][0m Trial 10 pruned. [0m


Val Loss: 4.6354 - Val Accuracy: 0.8677
Learning rate for Loss: 0.008041559116218412
Learning rate: 0.00037991345798925633
Weight decay: 0.00967944903244954
Epsilon: 9.011779972509285e-08
Beta: 1.0
Gamma: 0.1
Theta: 1.0
Batch size: 158
Number of epochs: 43


Epochs:   0%|          | 0/43 [00:00<?, ?it/s]

Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 1/43 - Loss: 15.2113 - Accuracy: 0.8253


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.8628 - Val Accuracy: 0.9198


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 2/43 - Loss: 7.0945 - Accuracy: 0.9221


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.3027 - Val Accuracy: 0.9357


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 3/43 - Loss: 6.1065 - Accuracy: 0.9307


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.2566 - Val Accuracy: 0.9190


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 4/43 - Loss: 5.6517 - Accuracy: 0.9349


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.6646 - Val Accuracy: 0.9367


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 5/43 - Loss: 5.6362 - Accuracy: 0.9335


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.1541 - Val Accuracy: 0.8966


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 6/43 - Loss: 5.4673 - Accuracy: 0.9353


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.3072 - Val Accuracy: 0.9325


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 7/43 - Loss: 5.2702 - Accuracy: 0.9393


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.7454 - Val Accuracy: 0.9293


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 8/43 - Loss: 5.0245 - Accuracy: 0.9395


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5599 - Val Accuracy: 0.9467


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 9/43 - Loss: 4.7881 - Accuracy: 0.9436


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8821 - Val Accuracy: 0.9393


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 10/43 - Loss: 5.1259 - Accuracy: 0.9394


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7352 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 11/43 - Loss: 5.0078 - Accuracy: 0.9385


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.5583 - Val Accuracy: 0.9362


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 12/43 - Loss: 4.7185 - Accuracy: 0.9425


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6025 - Val Accuracy: 0.9446


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 13/43 - Loss: 4.5050 - Accuracy: 0.9455


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.5791 - Val Accuracy: 0.9206


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 14/43 - Loss: 4.7323 - Accuracy: 0.9431


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5494 - Val Accuracy: 0.9446


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 15/43 - Loss: 4.6571 - Accuracy: 0.9434


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9400 - Val Accuracy: 0.9383


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 16/43 - Loss: 4.6075 - Accuracy: 0.9428


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6753 - Val Accuracy: 0.9436


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 17/43 - Loss: 4.5078 - Accuracy: 0.9445


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8758 - Val Accuracy: 0.9436


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 18/43 - Loss: 4.4972 - Accuracy: 0.9445


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4691 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 19/43 - Loss: 4.9057 - Accuracy: 0.9371


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8740 - Val Accuracy: 0.9383


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 20/43 - Loss: 4.5304 - Accuracy: 0.9444


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.3274 - Val Accuracy: 0.9269


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 21/43 - Loss: 4.3801 - Accuracy: 0.9475


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7077 - Val Accuracy: 0.9367


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 22/43 - Loss: 4.2281 - Accuracy: 0.9476


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7479 - Val Accuracy: 0.9399


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 23/43 - Loss: 4.4365 - Accuracy: 0.9453


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1070 - Val Accuracy: 0.9399


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 24/43 - Loss: 4.4151 - Accuracy: 0.9459


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0745 - Val Accuracy: 0.9346


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 25/43 - Loss: 4.2355 - Accuracy: 0.9477


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4898 - Val Accuracy: 0.9467


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 26/43 - Loss: 4.2971 - Accuracy: 0.9463


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6451 - Val Accuracy: 0.9409


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 27/43 - Loss: 4.3701 - Accuracy: 0.9444


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0693 - Val Accuracy: 0.9425


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 28/43 - Loss: 4.2807 - Accuracy: 0.9481


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.2825 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 29/43 - Loss: 4.2220 - Accuracy: 0.9483


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7908 - Val Accuracy: 0.9409


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 30/43 - Loss: 4.4728 - Accuracy: 0.9440


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.3977 - Val Accuracy: 0.9372


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 31/43 - Loss: 4.1558 - Accuracy: 0.9485


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4384 - Val Accuracy: 0.9430


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 32/43 - Loss: 4.2252 - Accuracy: 0.9456


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5695 - Val Accuracy: 0.9420


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 33/43 - Loss: 4.3112 - Accuracy: 0.9473


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5634 - Val Accuracy: 0.9420


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 34/43 - Loss: 4.2382 - Accuracy: 0.9457


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7901 - Val Accuracy: 0.9404


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 35/43 - Loss: 4.2892 - Accuracy: 0.9468


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.7113 - Val Accuracy: 0.9277


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 36/43 - Loss: 4.5118 - Accuracy: 0.9441


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4661 - Val Accuracy: 0.9404


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 37/43 - Loss: 4.1170 - Accuracy: 0.9503


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4620 - Val Accuracy: 0.9457


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 38/43 - Loss: 4.0346 - Accuracy: 0.9505


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7145 - Val Accuracy: 0.9388


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 39/43 - Loss: 4.1112 - Accuracy: 0.9484


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.0446 - Val Accuracy: 0.9478


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 40/43 - Loss: 4.1248 - Accuracy: 0.9487


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.1408 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 41/43 - Loss: 4.5000 - Accuracy: 0.9427


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.3021 - Val Accuracy: 0.9436


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 42/43 - Loss: 4.0468 - Accuracy: 0.9479


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.3849 - Val Accuracy: 0.9393


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 43/43 - Loss: 4.0747 - Accuracy: 0.9488


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:12:53,032][0m Trial 11 finished with value: 0.9456751346588135 and parameters: {'loss_learning_rate': 0.008041559116218412, 'learning_rate': 0.00037991345798925633, 'weight_decay': 0.00967944903244954, 'epsilon': 9.011779972509285e-08, 'beta': 1.0, 'gamma': 0.1, 'theta': 1.0, 'batch_size': 158, 'epochs': 43}. Best is trial 11 with value: 0.9456751346588135.[0m


Val Loss: 4.3024 - Val Accuracy: 0.9457
Saving best model...
Learning rate for Loss: 0.009290401460456234
Learning rate: 0.0002884843017216499
Weight decay: 0.002140692449457409
Epsilon: 5.233123983056263e-08
Beta: 1.0
Gamma: 0.1
Theta: 1.0
Batch size: 152
Number of epochs: 37


Epochs:   0%|          | 0/37 [00:00<?, ?it/s]

Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 1/37 - Loss: 15.8659 - Accuracy: 0.8086


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.3458 - Val Accuracy: 0.9015


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 2/37 - Loss: 7.5928 - Accuracy: 0.9112


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.1077 - Val Accuracy: 0.9226


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 3/37 - Loss: 6.5697 - Accuracy: 0.9256


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.9072 - Val Accuracy: 0.9286


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 4/37 - Loss: 5.9475 - Accuracy: 0.9325


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.5338 - Val Accuracy: 0.9347


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 5/37 - Loss: 5.5564 - Accuracy: 0.9370


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.1004 - Val Accuracy: 0.9224


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 6/37 - Loss: 5.3781 - Accuracy: 0.9365


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1816 - Val Accuracy: 0.9380


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 7/37 - Loss: 5.4672 - Accuracy: 0.9344


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.4793 - Val Accuracy: 0.9340


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 8/37 - Loss: 5.0286 - Accuracy: 0.9393


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9622 - Val Accuracy: 0.9324


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 9/37 - Loss: 4.8840 - Accuracy: 0.9420


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1104 - Val Accuracy: 0.9386


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 10/37 - Loss: 4.9903 - Accuracy: 0.9410


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.2877 - Val Accuracy: 0.9384


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 11/37 - Loss: 5.0268 - Accuracy: 0.9390


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0966 - Val Accuracy: 0.9395


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 12/37 - Loss: 4.8715 - Accuracy: 0.9411


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0056 - Val Accuracy: 0.9329


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 13/37 - Loss: 4.5656 - Accuracy: 0.9445


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5992 - Val Accuracy: 0.9418


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 14/37 - Loss: 4.5712 - Accuracy: 0.9451


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7337 - Val Accuracy: 0.9351


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 15/37 - Loss: 4.6850 - Accuracy: 0.9436


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9828 - Val Accuracy: 0.9385


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 16/37 - Loss: 4.4533 - Accuracy: 0.9472


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0353 - Val Accuracy: 0.9341


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 17/37 - Loss: 4.5517 - Accuracy: 0.9442


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0547 - Val Accuracy: 0.9384


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 18/37 - Loss: 4.4749 - Accuracy: 0.9445


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7848 - Val Accuracy: 0.9357


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 19/37 - Loss: 4.3918 - Accuracy: 0.9474


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6208 - Val Accuracy: 0.9429


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 20/37 - Loss: 4.3855 - Accuracy: 0.9476


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6436 - Val Accuracy: 0.9423


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 21/37 - Loss: 4.3513 - Accuracy: 0.9486


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.2915 - Val Accuracy: 0.9169


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 22/37 - Loss: 4.2978 - Accuracy: 0.9470


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6786 - Val Accuracy: 0.9385


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 23/37 - Loss: 4.4645 - Accuracy: 0.9442


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.3846 - Val Accuracy: 0.9197


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 24/37 - Loss: 4.3124 - Accuracy: 0.9465


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.2778 - Val Accuracy: 0.9467


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 25/37 - Loss: 4.2080 - Accuracy: 0.9485


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7310 - Val Accuracy: 0.9351


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 26/37 - Loss: 4.3005 - Accuracy: 0.9478


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1641 - Val Accuracy: 0.9340


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 27/37 - Loss: 4.2476 - Accuracy: 0.9470


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5839 - Val Accuracy: 0.9446


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 28/37 - Loss: 4.1467 - Accuracy: 0.9493


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7669 - Val Accuracy: 0.9395


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 29/37 - Loss: 4.2127 - Accuracy: 0.9469


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6213 - Val Accuracy: 0.9385


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 30/37 - Loss: 4.2690 - Accuracy: 0.9467


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.9114 - Val Accuracy: 0.9291


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 31/37 - Loss: 4.1415 - Accuracy: 0.9475


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.7471 - Val Accuracy: 0.9269


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 32/37 - Loss: 4.1056 - Accuracy: 0.9468


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4360 - Val Accuracy: 0.9390


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 33/37 - Loss: 4.1745 - Accuracy: 0.9468


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7610 - Val Accuracy: 0.9390


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 34/37 - Loss: 4.0546 - Accuracy: 0.9501


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5031 - Val Accuracy: 0.9396


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 35/37 - Loss: 4.0311 - Accuracy: 0.9485


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6404 - Val Accuracy: 0.9412


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 36/37 - Loss: 4.2416 - Accuracy: 0.9461


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.3968 - Val Accuracy: 0.9428


Training:   0%|          | 0/92 [00:00<?, ?it/s]

Epoch: 37/37 - Loss: 4.0886 - Accuracy: 0.9494


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:31:54,754][0m Trial 12 finished with value: 0.9406092166900635 and parameters: {'loss_learning_rate': 0.009290401460456234, 'learning_rate': 0.0002884843017216499, 'weight_decay': 0.002140692449457409, 'epsilon': 5.233123983056263e-08, 'beta': 1.0, 'gamma': 0.1, 'theta': 1.0, 'batch_size': 152, 'epochs': 37}. Best is trial 11 with value: 0.9456751346588135.[0m


Val Loss: 4.5628 - Val Accuracy: 0.9406
Learning rate for Loss: 0.009525024246916581
Learning rate: 0.0009759915409138405
Weight decay: 0.0021848607175665676
Epsilon: 4.079762010097853e-08
Beta: 1.0
Gamma: 0.1
Theta: 1.0
Batch size: 158
Number of epochs: 41


Epochs:   0%|          | 0/41 [00:00<?, ?it/s]

Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 1/41 - Loss: 12.3199 - Accuracy: 0.8690


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.1628 - Val Accuracy: 0.9204


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 2/41 - Loss: 7.2865 - Accuracy: 0.9172


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.3029 - Val Accuracy: 0.9227


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 3/41 - Loss: 6.7059 - Accuracy: 0.9242


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.9034 - Val Accuracy: 0.9272


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 4/41 - Loss: 5.7871 - Accuracy: 0.9349


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.3060 - Val Accuracy: 0.9425


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 5/41 - Loss: 6.1795 - Accuracy: 0.9273


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.0834 - Val Accuracy: 0.9037


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 6/41 - Loss: 6.0390 - Accuracy: 0.9310


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.4125 - Val Accuracy: 0.9172


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 7/41 - Loss: 5.3692 - Accuracy: 0.9370


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8150 - Val Accuracy: 0.9378


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 8/41 - Loss: 5.2621 - Accuracy: 0.9419


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.2487 - Val Accuracy: 0.9351


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 9/41 - Loss: 5.2063 - Accuracy: 0.9395


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7942 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 10/41 - Loss: 6.3098 - Accuracy: 0.9233


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.0464 - Val Accuracy: 0.9388


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 11/41 - Loss: 5.2208 - Accuracy: 0.9408


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8805 - Val Accuracy: 0.9420


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 12/41 - Loss: 5.2071 - Accuracy: 0.9382


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.2223 - Val Accuracy: 0.9383


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 13/41 - Loss: 5.5491 - Accuracy: 0.9355


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.3194 - Val Accuracy: 0.9383


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 14/41 - Loss: 5.3581 - Accuracy: 0.9384


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8128 - Val Accuracy: 0.9488


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 15/41 - Loss: 5.0263 - Accuracy: 0.9411


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6830 - Val Accuracy: 0.9436


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 16/41 - Loss: 5.0081 - Accuracy: 0.9416


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9488 - Val Accuracy: 0.9399


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 17/41 - Loss: 5.1257 - Accuracy: 0.9391


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.7084 - Val Accuracy: 0.9351


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 18/41 - Loss: 5.0679 - Accuracy: 0.9404


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8170 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 19/41 - Loss: 5.0721 - Accuracy: 0.9391


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.1008 - Val Accuracy: 0.9446


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 20/41 - Loss: 4.9400 - Accuracy: 0.9430


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5679 - Val Accuracy: 0.9478


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 21/41 - Loss: 5.3590 - Accuracy: 0.9353


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.4340 - Val Accuracy: 0.9378


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 22/41 - Loss: 5.3978 - Accuracy: 0.9368


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.7729 - Val Accuracy: 0.9293


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 23/41 - Loss: 4.8087 - Accuracy: 0.9453


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.4754 - Val Accuracy: 0.9462


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 24/41 - Loss: 4.9228 - Accuracy: 0.9444


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.6992 - Val Accuracy: 0.9409


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 25/41 - Loss: 5.6973 - Accuracy: 0.9323


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.7033 - Val Accuracy: 0.9357


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 26/41 - Loss: 4.9054 - Accuracy: 0.9461


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.6741 - Val Accuracy: 0.9341


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 27/41 - Loss: 4.9294 - Accuracy: 0.9436


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.5406 - Val Accuracy: 0.9467


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 28/41 - Loss: 4.6572 - Accuracy: 0.9465


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.1408 - Val Accuracy: 0.9098


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 29/41 - Loss: 5.1798 - Accuracy: 0.9382


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7650 - Val Accuracy: 0.9473


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 30/41 - Loss: 5.2889 - Accuracy: 0.9384


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0800 - Val Accuracy: 0.9393


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 31/41 - Loss: 5.0040 - Accuracy: 0.9426


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9508 - Val Accuracy: 0.9388


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 32/41 - Loss: 4.8047 - Accuracy: 0.9460


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7047 - Val Accuracy: 0.9425


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 33/41 - Loss: 4.8764 - Accuracy: 0.9450


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.7500 - Val Accuracy: 0.9462


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 34/41 - Loss: 4.9746 - Accuracy: 0.9428


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8716 - Val Accuracy: 0.9430


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 35/41 - Loss: 4.7203 - Accuracy: 0.9469


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.6287 - Val Accuracy: 0.9367


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 36/41 - Loss: 4.8176 - Accuracy: 0.9467


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.8950 - Val Accuracy: 0.9441


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 37/41 - Loss: 4.6797 - Accuracy: 0.9459


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 4.9464 - Val Accuracy: 0.9420


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 38/41 - Loss: 4.9287 - Accuracy: 0.9434


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.4925 - Val Accuracy: 0.9367


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 39/41 - Loss: 5.0233 - Accuracy: 0.9431


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 5.0409 - Val Accuracy: 0.9404


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 40/41 - Loss: 5.0572 - Accuracy: 0.9422


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 6.1308 - Val Accuracy: 0.9341


Training:   0%|          | 0/89 [00:00<?, ?it/s]

Epoch: 41/41 - Loss: 4.8014 - Accuracy: 0.9446


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:52:53,723][0m Trial 13 finished with value: 0.9372362494468689 and parameters: {'loss_learning_rate': 0.009525024246916581, 'learning_rate': 0.0009759915409138405, 'weight_decay': 0.0021848607175665676, 'epsilon': 4.079762010097853e-08, 'beta': 1.0, 'gamma': 0.1, 'theta': 1.0, 'batch_size': 158, 'epochs': 41}. Best is trial 11 with value: 0.9456751346588135.[0m


Val Loss: 5.2764 - Val Accuracy: 0.9372
Learning rate for Loss: 0.009723916360876673
Learning rate: 0.0006761963273093207
Weight decay: 0.00450317699753798
Epsilon: 4.6147791934844224e-08
Beta: 0.8
Gamma: 0.1
Theta: 1.0
Batch size: 72
Number of epochs: 43


Epochs:   0%|          | 0/43 [00:00<?, ?it/s]

Training:   0%|          | 0/195 [00:00<?, ?it/s]

Epoch: 1/43 - Loss: 11.3304 - Accuracy: 0.8581


  0%|          | 0/25 [00:00<?, ?it/s]

Val Loss: 6.7333 - Val Accuracy: 0.9206


Training:   0%|          | 0/195 [00:00<?, ?it/s]

Epoch: 2/43 - Loss: 6.6567 - Accuracy: 0.9222


  0%|          | 0/25 [00:00<?, ?it/s]

Val Loss: 7.2563 - Val Accuracy: 0.9083


Training:   0%|          | 0/195 [00:00<?, ?it/s]

Epoch: 3/43 - Loss: 6.3499 - Accuracy: 0.9249


  0%|          | 0/25 [00:00<?, ?it/s]

Val Loss: 7.3260 - Val Accuracy: 0.9013


Training:   0%|          | 0/195 [00:00<?, ?it/s]

Epoch: 4/43 - Loss: 6.1049 - Accuracy: 0.9273


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:54:58,221][0m Trial 14 pruned. [0m


Val Loss: 8.0783 - Val Accuracy: 0.8895
Learning rate for Loss: 0.0013877451119184616
Learning rate: 0.00015657262193007139
Weight decay: 0.0015453901059385518
Epsilon: 4.297091262434528e-08
Beta: 0.8
Gamma: 0.1
Theta: 0.9
Batch size: 248
Number of epochs: 40


Epochs:   0%|          | 0/40 [00:00<?, ?it/s]

Training:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch: 1/40 - Loss: 21.8359 - Accuracy: 0.6737


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-12-15 05:55:30,096][0m Trial 15 pruned. [0m


Val Loss: 13.1622 - Val Accuracy: 0.8420
Learning rate for Loss: 0.00497628852390401
Learning rate: 0.002218233772678464
Weight decay: 0.005552318576352257
Epsilon: 5.598361898360538e-08
Beta: 1.0
Gamma: 0.30000000000000004
Theta: 1.0
Batch size: 149
Number of epochs: 26


Epochs:   0%|          | 0/26 [00:00<?, ?it/s]

Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 1/26 - Loss: 15.0314 - Accuracy: 0.8673


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 9.2671 - Val Accuracy: 0.9197


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 2/26 - Loss: 9.7963 - Accuracy: 0.9131


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.1071 - Val Accuracy: 0.9309


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 3/26 - Loss: 9.8080 - Accuracy: 0.9158


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.6196 - Val Accuracy: 0.9371


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 4/26 - Loss: 8.4337 - Accuracy: 0.9278


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.8390 - Val Accuracy: 0.9221


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 5/26 - Loss: 7.9029 - Accuracy: 0.9331


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.2271 - Val Accuracy: 0.9294


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 6/26 - Loss: 7.9179 - Accuracy: 0.9332


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.5721 - Val Accuracy: 0.9302


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 7/26 - Loss: 7.8037 - Accuracy: 0.9337


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.4954 - Val Accuracy: 0.9273


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 8/26 - Loss: 7.9562 - Accuracy: 0.9328


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.4438 - Val Accuracy: 0.9371


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 9/26 - Loss: 7.6976 - Accuracy: 0.9352


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.0761 - Val Accuracy: 0.9404


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 10/26 - Loss: 7.4872 - Accuracy: 0.9383


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.4421 - Val Accuracy: 0.9432


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 11/26 - Loss: 7.2473 - Accuracy: 0.9373


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.4370 - Val Accuracy: 0.9341


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 12/26 - Loss: 8.0142 - Accuracy: 0.9357


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.7472 - Val Accuracy: 0.9381


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 13/26 - Loss: 7.7000 - Accuracy: 0.9366


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.0896 - Val Accuracy: 0.9227


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 14/26 - Loss: 7.4361 - Accuracy: 0.9387


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.3652 - Val Accuracy: 0.9396


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 15/26 - Loss: 7.1970 - Accuracy: 0.9404


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.7836 - Val Accuracy: 0.9405


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 16/26 - Loss: 7.5264 - Accuracy: 0.9416


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.8437 - Val Accuracy: 0.9320


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 17/26 - Loss: 7.4356 - Accuracy: 0.9392


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.5917 - Val Accuracy: 0.9370


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 18/26 - Loss: 7.2916 - Accuracy: 0.9408


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.8217 - Val Accuracy: 0.9310


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 19/26 - Loss: 7.4675 - Accuracy: 0.9447


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.5530 - Val Accuracy: 0.9300


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 20/26 - Loss: 7.6089 - Accuracy: 0.9379


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 8.6926 - Val Accuracy: 0.9209


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 21/26 - Loss: 7.6911 - Accuracy: 0.9380


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.8184 - Val Accuracy: 0.9418


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 22/26 - Loss: 7.4561 - Accuracy: 0.9394


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.6105 - Val Accuracy: 0.9416


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 23/26 - Loss: 7.5642 - Accuracy: 0.9390


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.4404 - Val Accuracy: 0.9463


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 24/26 - Loss: 7.2852 - Accuracy: 0.9426


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.5032 - Val Accuracy: 0.9397


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 25/26 - Loss: 7.5745 - Accuracy: 0.9419


  0%|          | 0/12 [00:00<?, ?it/s]

Val Loss: 7.8406 - Val Accuracy: 0.9441


Training:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch: 26/26 - Loss: 7.8746 - Accuracy: 0.9366


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:08:43,980][0m Trial 16 finished with value: 0.9351071715354919 and parameters: {'loss_learning_rate': 0.00497628852390401, 'learning_rate': 0.002218233772678464, 'weight_decay': 0.005552318576352257, 'epsilon': 5.598361898360538e-08, 'beta': 1.0, 'gamma': 0.30000000000000004, 'theta': 1.0, 'batch_size': 149, 'epochs': 26}. Best is trial 11 with value: 0.9456751346588135.[0m


Val Loss: 7.2861 - Val Accuracy: 0.9351
Learning rate for Loss: 0.00480929415442556
Learning rate: 0.0003468961902933504
Weight decay: 0.0010980053736140993
Epsilon: 9.801269489115104e-08
Beta: 0.8
Gamma: 0.2
Theta: 0.9
Batch size: 91
Number of epochs: 49


Epochs:   0%|          | 0/49 [00:00<?, ?it/s]

Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 1/49 - Loss: 14.0010 - Accuracy: 0.8272


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 6.9893 - Val Accuracy: 0.9205


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 2/49 - Loss: 6.9903 - Accuracy: 0.9185


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.8491 - Val Accuracy: 0.9273


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 3/49 - Loss: 6.0206 - Accuracy: 0.9310


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.8901 - Val Accuracy: 0.9240


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 4/49 - Loss: 5.5481 - Accuracy: 0.9354


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.5724 - Val Accuracy: 0.9273


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 5/49 - Loss: 5.4563 - Accuracy: 0.9351


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.4072 - Val Accuracy: 0.9313


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 6/49 - Loss: 5.6505 - Accuracy: 0.9333


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.3500 - Val Accuracy: 0.9363


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 7/49 - Loss: 5.0560 - Accuracy: 0.9400


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.2978 - Val Accuracy: 0.9357


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 8/49 - Loss: 4.9947 - Accuracy: 0.9401


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 6.7257 - Val Accuracy: 0.9159


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 9/49 - Loss: 5.1185 - Accuracy: 0.9379


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.7129 - Val Accuracy: 0.9298


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 10/49 - Loss: 4.9784 - Accuracy: 0.9407


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.1034 - Val Accuracy: 0.9379


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 11/49 - Loss: 4.9882 - Accuracy: 0.9409


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8213 - Val Accuracy: 0.9379


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 12/49 - Loss: 4.6603 - Accuracy: 0.9456


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.7479 - Val Accuracy: 0.9440


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 13/49 - Loss: 5.1188 - Accuracy: 0.9396


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.0407 - Val Accuracy: 0.9434


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 14/49 - Loss: 4.7136 - Accuracy: 0.9426


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8372 - Val Accuracy: 0.9359


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 15/49 - Loss: 4.7092 - Accuracy: 0.9437


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8644 - Val Accuracy: 0.9440


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 16/49 - Loss: 4.6935 - Accuracy: 0.9430


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.2546 - Val Accuracy: 0.9330


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 17/49 - Loss: 4.6831 - Accuracy: 0.9431


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6615 - Val Accuracy: 0.9445


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 18/49 - Loss: 4.8231 - Accuracy: 0.9412


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.9045 - Val Accuracy: 0.9397


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 19/49 - Loss: 4.6052 - Accuracy: 0.9433


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6706 - Val Accuracy: 0.9452


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 20/49 - Loss: 4.6293 - Accuracy: 0.9437


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 7.0270 - Val Accuracy: 0.9123


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 21/49 - Loss: 4.7306 - Accuracy: 0.9409


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.2854 - Val Accuracy: 0.9363


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 22/49 - Loss: 4.4989 - Accuracy: 0.9450


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.1584 - Val Accuracy: 0.9346


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 23/49 - Loss: 4.2827 - Accuracy: 0.9484


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.5766 - Val Accuracy: 0.9451


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 24/49 - Loss: 4.3444 - Accuracy: 0.9469


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.5927 - Val Accuracy: 0.9282


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 25/49 - Loss: 4.4034 - Accuracy: 0.9476


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6178 - Val Accuracy: 0.9429


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 26/49 - Loss: 4.8048 - Accuracy: 0.9397


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.7020 - Val Accuracy: 0.9419


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 27/49 - Loss: 4.4751 - Accuracy: 0.9453


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.5800 - Val Accuracy: 0.9434


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 28/49 - Loss: 4.3111 - Accuracy: 0.9485


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6839 - Val Accuracy: 0.9396


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 29/49 - Loss: 4.4290 - Accuracy: 0.9452


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6447 - Val Accuracy: 0.9434


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 30/49 - Loss: 4.3960 - Accuracy: 0.9449


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.3873 - Val Accuracy: 0.9451


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 31/49 - Loss: 4.6561 - Accuracy: 0.9433


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8915 - Val Accuracy: 0.9429


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 32/49 - Loss: 4.2802 - Accuracy: 0.9467


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6277 - Val Accuracy: 0.9445


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 33/49 - Loss: 4.5004 - Accuracy: 0.9432


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.4409 - Val Accuracy: 0.9440


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 34/49 - Loss: 4.2840 - Accuracy: 0.9470


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.8067 - Val Accuracy: 0.9243


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 35/49 - Loss: 4.5197 - Accuracy: 0.9441


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6560 - Val Accuracy: 0.9381


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 36/49 - Loss: 4.2505 - Accuracy: 0.9488


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.5131 - Val Accuracy: 0.9429


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 37/49 - Loss: 4.4646 - Accuracy: 0.9444


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.0969 - Val Accuracy: 0.9396


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 38/49 - Loss: 4.3416 - Accuracy: 0.9475


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8573 - Val Accuracy: 0.9423


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 39/49 - Loss: 4.4383 - Accuracy: 0.9463


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.5259 - Val Accuracy: 0.9423


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 40/49 - Loss: 4.2732 - Accuracy: 0.9473


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6583 - Val Accuracy: 0.9403


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 41/49 - Loss: 4.0577 - Accuracy: 0.9503


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8276 - Val Accuracy: 0.9462


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 42/49 - Loss: 4.1131 - Accuracy: 0.9497


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.8201 - Val Accuracy: 0.9375


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 43/49 - Loss: 4.2300 - Accuracy: 0.9477


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.9360 - Val Accuracy: 0.9386


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 44/49 - Loss: 4.3879 - Accuracy: 0.9466


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.7841 - Val Accuracy: 0.9353


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 45/49 - Loss: 4.5010 - Accuracy: 0.9446


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 5.2908 - Val Accuracy: 0.9374


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 46/49 - Loss: 4.2850 - Accuracy: 0.9488


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.4015 - Val Accuracy: 0.9445


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 47/49 - Loss: 4.3010 - Accuracy: 0.9477


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 4.6340 - Val Accuracy: 0.9408


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 48/49 - Loss: 4.2296 - Accuracy: 0.9486


  0%|          | 0/20 [00:00<?, ?it/s]

Val Loss: 6.9534 - Val Accuracy: 0.9161


Training:   0%|          | 0/154 [00:00<?, ?it/s]

Epoch: 49/49 - Loss: 4.2592 - Accuracy: 0.9477


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:33:45,315][0m Trial 17 finished with value: 0.9397320747375488 and parameters: {'loss_learning_rate': 0.00480929415442556, 'learning_rate': 0.0003468961902933504, 'weight_decay': 0.0010980053736140993, 'epsilon': 9.801269489115104e-08, 'beta': 0.8, 'gamma': 0.2, 'theta': 0.9, 'batch_size': 91, 'epochs': 49}. Best is trial 11 with value: 0.9456751346588135.[0m


Val Loss: 4.6721 - Val Accuracy: 0.9397
Learning rate for Loss: 0.006791808821536431
Learning rate: 7.112624965761007e-05
Weight decay: 0.003261350348337336
Epsilon: 2.9126736387723937e-08
Beta: 1.0
Gamma: 0.4
Theta: 0.9
Batch size: 133
Number of epochs: 32


Epochs:   0%|          | 0/32 [00:00<?, ?it/s]

Training:   0%|          | 0/106 [00:00<?, ?it/s]

Epoch: 1/32 - Loss: 30.6461 - Accuracy: 0.6353


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:34:18,890][0m Trial 18 pruned. [0m


Val Loss: 19.2589 - Val Accuracy: 0.7943
Learning rate for Loss: 0.0031693779058061867
Learning rate: 0.00224307293898659
Weight decay: 0.0065306793127964885
Epsilon: 2.93993631940951e-08
Beta: 0.9
Gamma: 0.2
Theta: 0.1
Batch size: 166
Number of epochs: 34


Epochs:   0%|          | 0/34 [00:00<?, ?it/s]

Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 1/34 - Loss: 7.2308 - Accuracy: 0.8445


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.6947 - Val Accuracy: 0.9158


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 2/34 - Loss: 4.3084 - Accuracy: 0.9141


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6340 - Val Accuracy: 0.9306


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 3/34 - Loss: 3.9042 - Accuracy: 0.9205


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.3002 - Val Accuracy: 0.9339


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 4/34 - Loss: 3.7028 - Accuracy: 0.9261


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.3892 - Val Accuracy: 0.9327


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 5/34 - Loss: 3.6023 - Accuracy: 0.9222


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.4653 - Val Accuracy: 0.9360


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 6/34 - Loss: 3.6625 - Accuracy: 0.9221


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.8183 - Val Accuracy: 0.9159


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 7/34 - Loss: 3.6307 - Accuracy: 0.9233


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.5974 - Val Accuracy: 0.8623


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 8/34 - Loss: 3.6713 - Accuracy: 0.9276


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6617 - Val Accuracy: 0.9322


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 9/34 - Loss: 3.5046 - Accuracy: 0.9290


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.3530 - Val Accuracy: 0.9262


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 10/34 - Loss: 3.4060 - Accuracy: 0.9313


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.8153 - Val Accuracy: 0.9290


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 11/34 - Loss: 3.4238 - Accuracy: 0.9352


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6110 - Val Accuracy: 0.9279


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 12/34 - Loss: 3.4723 - Accuracy: 0.9323


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.3707 - Val Accuracy: 0.9327


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 13/34 - Loss: 3.3002 - Accuracy: 0.9358


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.0838 - Val Accuracy: 0.9393


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 14/34 - Loss: 3.2819 - Accuracy: 0.9353


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6384 - Val Accuracy: 0.9387


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 15/34 - Loss: 3.3572 - Accuracy: 0.9333


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.8073 - Val Accuracy: 0.9300


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 16/34 - Loss: 3.2616 - Accuracy: 0.9399


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.0115 - Val Accuracy: 0.9453


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 17/34 - Loss: 3.2043 - Accuracy: 0.9350


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.1641 - Val Accuracy: 0.9393


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 18/34 - Loss: 3.3101 - Accuracy: 0.9352


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6572 - Val Accuracy: 0.9371


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 19/34 - Loss: 3.7226 - Accuracy: 0.9347


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.3272 - Val Accuracy: 0.9382


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 20/34 - Loss: 3.4451 - Accuracy: 0.9389


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6251 - Val Accuracy: 0.9339


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 21/34 - Loss: 3.5041 - Accuracy: 0.9345


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.0170 - Val Accuracy: 0.9060


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 22/34 - Loss: 3.5669 - Accuracy: 0.9382


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 4.2097 - Val Accuracy: 0.9387


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 23/34 - Loss: 3.6420 - Accuracy: 0.9364


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.5318 - Val Accuracy: 0.9436


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 24/34 - Loss: 3.5233 - Accuracy: 0.9395


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.8851 - Val Accuracy: 0.9301


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 25/34 - Loss: 3.5992 - Accuracy: 0.9337


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.3506 - Val Accuracy: 0.9229


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 26/34 - Loss: 3.4160 - Accuracy: 0.9389


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.5692 - Val Accuracy: 0.9311


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 27/34 - Loss: 3.5203 - Accuracy: 0.9330


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6489 - Val Accuracy: 0.9349


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 28/34 - Loss: 3.6203 - Accuracy: 0.9385


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.2387 - Val Accuracy: 0.9279


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 29/34 - Loss: 3.5640 - Accuracy: 0.9373


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.4335 - Val Accuracy: 0.9420


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 30/34 - Loss: 3.6220 - Accuracy: 0.9441


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.4730 - Val Accuracy: 0.9420


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 31/34 - Loss: 3.6681 - Accuracy: 0.9413


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.2772 - Val Accuracy: 0.9224


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 32/34 - Loss: 3.4799 - Accuracy: 0.9367


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.6476 - Val Accuracy: 0.9415


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 33/34 - Loss: 3.4917 - Accuracy: 0.9428


  0%|          | 0/11 [00:00<?, ?it/s]

Val Loss: 3.4707 - Val Accuracy: 0.9387


Training:   0%|          | 0/85 [00:00<?, ?it/s]

Epoch: 34/34 - Loss: 3.5694 - Accuracy: 0.9325


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-12-15 06:52:13,583][0m Trial 19 finished with value: 0.8770890831947327 and parameters: {'loss_learning_rate': 0.0031693779058061867, 'learning_rate': 0.00224307293898659, 'weight_decay': 0.0065306793127964885, 'epsilon': 2.93993631940951e-08, 'beta': 0.9, 'gamma': 0.2, 'theta': 0.1, 'batch_size': 166, 'epochs': 34}. Best is trial 11 with value: 0.9456751346588135.[0m


Val Loss: 3.9234 - Val Accuracy: 0.8771

Study statistics: 
  Number of finished trials:  20
  Number of pruned trials:  9
  Number of complete trials:  11


In [23]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9456751346588135
  Params: 
    batch_size: 158
    beta: 1.0
    epochs: 43
    epsilon: 9.011779972509285e-08
    gamma: 0.1
    learning_rate: 0.00037991345798925633
    loss_learning_rate: 0.008041559116218412
    theta: 1.0
    weight_decay: 0.00967944903244954


In [None]:
# ViT P12-S8 Triplet ArcFace CosFace Mean

Best trial:
Value:  0.9456751346588135
Params: 
batch_size: 158
beta: 1.0
epochs: 43
epsilon: 9.011779972509285e-08
gamma: 0.1
learning_rate: 0.00037991345798925633
loss_learning_rate: 0.008041559116218412
theta: 1.0
weight_decay: 0.00967944903244954