In [1]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm.notebook import tqdm

In [2]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir, train=False):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
        self.is_train = train
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]
        
        if self.is_train:
            positive_list = self.img_lbls[self.img_lbls.iloc[:, 1] == label].index.values
            positive_list = np.setdiff1d(positive_list, np.array([idx]))
            positive_item = random.choice(positive_list)
            positive_img = self.img_lbls.iloc[positive_item, 0]
            pos_img_path = os.path.join(self.img_dir, positive_img)
            pos_image = mx.image.imread(pos_img_path)
            if pos_image.shape[1] != 112:
                pos_image = mx.image.resize_short(pos_image, 112)
            pos_image = mx.nd.transpose(pos_image, axes=(2,0,1))
            pos_image = torch.tensor(pos_image.asnumpy()).type(torch.FloatTensor)
            
            negative_list = self.img_lbls[self.img_lbls.iloc[:, 1] != label].index.values
            negative_item = random.choice(negative_list)
            negative_img = self.img_lbls.iloc[negative_item, 0]
            neg_img_path = os.path.join(self.img_dir, negative_img)
            neg_image = mx.image.imread(neg_img_path)
            if neg_image.shape[1] != 112:
                neg_image = mx.image.resize_short(neg_image, 112)
            neg_image = mx.nd.transpose(neg_image, axes=(2,0,1))
            neg_image = torch.tensor(neg_image.asnumpy()).type(torch.FloatTensor)
            
            return image, pos_image, neg_image, label, img_file, positive_img, negative_img

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=False)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=False)

In [20]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=True)

In [7]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()

In [8]:
class CombinedLoss(nn.Module):
    def __init__(self, beta=1.0):
        super(CombinedLoss, self).__init__()
        self.beta = beta
        self.triplet = TripletLoss(margin=1.0)
        self.classification = nn.CrossEntropyLoss()
        
    def forward(self, anchor, positive, negative, classification_out, labels):
        triplet_loss = self.triplet(anchor, positive, negative)
        classification_loss = self.classification(classification_out, labels)
        total_loss = (self.beta * triplet_loss) + classification_loss
        
        return total_loss

In [9]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [10]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [11]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [12]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [13]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [14]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [15]:
class ViTs_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, ac_patch_size,
                         pad, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * ac_patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size
        self.soft_split = nn.Unfold(kernel_size=(ac_patch_size, ac_patch_size), stride=(self.patch_size, self.patch_size), padding=(pad, pad))


        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'Softmax':
                self.loss = Softmax(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'ArcFace':
                self.loss = ArcFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)
            elif self.loss_type == 'SFace':
                self.loss = SFaceLoss(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label= None , mask = None):
        p = self.patch_size
        x = self.soft_split(img).transpose(1, 2)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        y = x[:, 0]
        z = x[:, 1:].mean(dim = 1)

        y = self.to_latent(y)
        emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        emb = torch.cat((emb_y, emb_z), dim=1)
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [16]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=1024, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [17]:
model = ViTs_face(
            loss_type='CosFace',
            GPU_ID=[device],
            num_class=93431,
            image_size=112,
            patch_size=8,
            ac_patch_size=12,
            pad=4,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Face-Transformer/results/ViT-P12S8_ms1m_cosface/Backbone_VITs_Epoch_2_Batch_12000_Time_2021-03-17-04-05_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=1)]


<All keys matched successfully>

In [18]:
for param in model.parameters():
    param.requires_grad = False

In [19]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, _, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, _, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [21]:
best_accu = 0.9669528007507324
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    criterion = CombinedLoss().to(device)
    # arc_margin = losses.ArcFaceLoss(classes, embedding_size, margin=0.5, scale=30.0)
    
    batch_size = trial.suggest_int('batch_size', 10, 100)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(train_loader, desc="Training", leave=False):
            img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)

            x1 = file_to_embed(embeds, img_file)
            x2 = file_to_embed(embeds, pos_file)
            x3 = file_to_embed(embeds, neg_file)
            
            optimizer.zero_grad()
            anchor, output = model_xtr(x1)
            pos, _ = model_xtr(x2)
            neg, _ = model_xtr(x3)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            # class_triplet_loss = criterion(anchor_out, positive_out, negative_out, anchor_pred, anchor_label)
            # arc_loss = arc_margin(anchor_out, anchor_label)
            # loss = (gamma * arc_loss) + class_triplet_loss
            loss = criterion(anchor, pos, neg, output, label)
            loss.backward()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(val_loader):
                img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)
                
                x1 = file_to_embed(embeds, img_file)
                x2 = file_to_embed(embeds, pos_file)
                x3 = file_to_embed(embeds, neg_file)
                
                anchor, output = model_xtr(x1)
                pos, _ = model_xtr(x2)
                neg, _ = model_xtr(x3)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                # class_triplet_loss = criterion(anchor, pos, neg, output, label)
                # arc_loss = arc_margin(anchor, label)
                # loss = (gamma * arc_loss) + class_triplet_loss
                loss = criterion(anchor, pos, neg, output, label)
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_12-8_triplet_mean.pt")
            
    return val_accu

In [22]:
study = optuna.create_study(direction='maximize',
                            study_name='triplet-12-8-mean-vit-study',
                            storage='sqlite:///study.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=10)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-07 22:23:31,706][0m Using an existing study with name 'triplet-12-8-mean-vit-study' instead of creating a new one.[0m


Learning rate: 0.00017348444622346392
Weight decay: 0.00011451862597352213
Epsilon: 8.227661069131743e-08
Batch size: 21
Number of epochs: 95


Epochs:   0%|          | 0/95 [00:00<?, ?it/s]

Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 1/95 - Loss: 6.6226 - Accuracy: 0.9165


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.7800 - Val Accuracy: 0.9586


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 2/95 - Loss: 3.7825 - Accuracy: 0.9576


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4226 - Val Accuracy: 0.9592


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 3/95 - Loss: 3.7160 - Accuracy: 0.9610


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0997 - Val Accuracy: 0.9609


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 4/95 - Loss: 3.4070 - Accuracy: 0.9595


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.2782 - Val Accuracy: 0.9637


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 5/95 - Loss: 3.3437 - Accuracy: 0.9610


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.6058 - Val Accuracy: 0.9654


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 6/95 - Loss: 3.1377 - Accuracy: 0.9641


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.3782 - Val Accuracy: 0.9632


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 7/95 - Loss: 3.2161 - Accuracy: 0.9647


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.6402 - Val Accuracy: 0.9615


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 8/95 - Loss: 3.3971 - Accuracy: 0.9656


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.2795 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 9/95 - Loss: 3.2826 - Accuracy: 0.9657


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.5337 - Val Accuracy: 0.9637


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 10/95 - Loss: 3.0371 - Accuracy: 0.9685


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5602 - Val Accuracy: 0.9637


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 11/95 - Loss: 3.1897 - Accuracy: 0.9675


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.5428 - Val Accuracy: 0.9637


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 12/95 - Loss: 3.1941 - Accuracy: 0.9675


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4563 - Val Accuracy: 0.9603


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 13/95 - Loss: 3.2397 - Accuracy: 0.9674


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.8395 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 14/95 - Loss: 3.2082 - Accuracy: 0.9684


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0473 - Val Accuracy: 0.9654


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 15/95 - Loss: 3.2923 - Accuracy: 0.9677


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.2795 - Val Accuracy: 0.9620


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 16/95 - Loss: 3.2130 - Accuracy: 0.9699


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.7180 - Val Accuracy: 0.9626


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 17/95 - Loss: 3.1642 - Accuracy: 0.9688


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1614 - Val Accuracy: 0.9671


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 18/95 - Loss: 3.1385 - Accuracy: 0.9699


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.9089 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 19/95 - Loss: 3.1730 - Accuracy: 0.9701


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1807 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 20/95 - Loss: 3.3770 - Accuracy: 0.9698


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0597 - Val Accuracy: 0.9575


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 21/95 - Loss: 3.2230 - Accuracy: 0.9697


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0863 - Val Accuracy: 0.9563


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 22/95 - Loss: 3.1231 - Accuracy: 0.9698


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6625 - Val Accuracy: 0.9603


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 23/95 - Loss: 3.1676 - Accuracy: 0.9700


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.9422 - Val Accuracy: 0.9569


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 24/95 - Loss: 3.2240 - Accuracy: 0.9707


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5148 - Val Accuracy: 0.9609


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 25/95 - Loss: 3.1574 - Accuracy: 0.9712


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.8277 - Val Accuracy: 0.9637


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 26/95 - Loss: 3.3120 - Accuracy: 0.9712


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.5808 - Val Accuracy: 0.9705


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 27/95 - Loss: 3.1401 - Accuracy: 0.9708


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5249 - Val Accuracy: 0.9683


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 28/95 - Loss: 3.0797 - Accuracy: 0.9710


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.6790 - Val Accuracy: 0.9694


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 29/95 - Loss: 3.3063 - Accuracy: 0.9720


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.9380 - Val Accuracy: 0.9654


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 30/95 - Loss: 3.1248 - Accuracy: 0.9715


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1106 - Val Accuracy: 0.9626


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 31/95 - Loss: 3.2718 - Accuracy: 0.9724


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5034 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 32/95 - Loss: 3.3255 - Accuracy: 0.9724


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4358 - Val Accuracy: 0.9649


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 33/95 - Loss: 3.1081 - Accuracy: 0.9726


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6885 - Val Accuracy: 0.9666


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 34/95 - Loss: 3.0972 - Accuracy: 0.9707


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 5.0499 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 35/95 - Loss: 3.2871 - Accuracy: 0.9732


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.9259 - Val Accuracy: 0.9688


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 36/95 - Loss: 3.1243 - Accuracy: 0.9722


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4757 - Val Accuracy: 0.9666


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 37/95 - Loss: 3.1259 - Accuracy: 0.9725


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6334 - Val Accuracy: 0.9683


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 38/95 - Loss: 3.2551 - Accuracy: 0.9730


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1943 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 39/95 - Loss: 2.9055 - Accuracy: 0.9722


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6687 - Val Accuracy: 0.9654


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 40/95 - Loss: 3.3813 - Accuracy: 0.9726


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.2887 - Val Accuracy: 0.9677


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 41/95 - Loss: 3.2393 - Accuracy: 0.9700


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.7402 - Val Accuracy: 0.9649


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 42/95 - Loss: 3.2178 - Accuracy: 0.9718


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1980 - Val Accuracy: 0.9694


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 43/95 - Loss: 3.1710 - Accuracy: 0.9733


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6858 - Val Accuracy: 0.9671


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 44/95 - Loss: 3.0919 - Accuracy: 0.9735


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.7372 - Val Accuracy: 0.9677


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 45/95 - Loss: 3.2797 - Accuracy: 0.9718


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.8619 - Val Accuracy: 0.9626


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 46/95 - Loss: 3.1027 - Accuracy: 0.9705


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6272 - Val Accuracy: 0.9603


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 47/95 - Loss: 3.1547 - Accuracy: 0.9733


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.7039 - Val Accuracy: 0.9632


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 48/95 - Loss: 3.2251 - Accuracy: 0.9731


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5894 - Val Accuracy: 0.9637


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 49/95 - Loss: 3.2763 - Accuracy: 0.9743


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3488 - Val Accuracy: 0.9598


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 50/95 - Loss: 3.1175 - Accuracy: 0.9731


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 5.0392 - Val Accuracy: 0.9615


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 51/95 - Loss: 3.2843 - Accuracy: 0.9745


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4053 - Val Accuracy: 0.9654


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 52/95 - Loss: 2.9399 - Accuracy: 0.9738


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5510 - Val Accuracy: 0.9649


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 53/95 - Loss: 3.1607 - Accuracy: 0.9737


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.2030 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 54/95 - Loss: 3.1733 - Accuracy: 0.9733


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.8199 - Val Accuracy: 0.9666


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 55/95 - Loss: 3.1800 - Accuracy: 0.9731


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3109 - Val Accuracy: 0.9603


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 56/95 - Loss: 3.2138 - Accuracy: 0.9741


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3889 - Val Accuracy: 0.9603


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 57/95 - Loss: 3.1711 - Accuracy: 0.9725


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4027 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 58/95 - Loss: 3.2383 - Accuracy: 0.9738


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.8410 - Val Accuracy: 0.9671


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 59/95 - Loss: 3.2132 - Accuracy: 0.9740


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0633 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 60/95 - Loss: 3.0097 - Accuracy: 0.9738


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1692 - Val Accuracy: 0.9632


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 61/95 - Loss: 3.1150 - Accuracy: 0.9731


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.7985 - Val Accuracy: 0.9620


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 62/95 - Loss: 3.2878 - Accuracy: 0.9735


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4954 - Val Accuracy: 0.9671


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 63/95 - Loss: 3.0401 - Accuracy: 0.9732


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.9399 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 64/95 - Loss: 3.2335 - Accuracy: 0.9743


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4890 - Val Accuracy: 0.9615


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 65/95 - Loss: 3.1715 - Accuracy: 0.9739


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.8348 - Val Accuracy: 0.9620


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 66/95 - Loss: 2.8720 - Accuracy: 0.9729


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.2243 - Val Accuracy: 0.9620


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 67/95 - Loss: 3.1771 - Accuracy: 0.9743


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.8705 - Val Accuracy: 0.9649


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 68/95 - Loss: 3.1223 - Accuracy: 0.9745


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.8589 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 69/95 - Loss: 3.3579 - Accuracy: 0.9743


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4557 - Val Accuracy: 0.9683


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 70/95 - Loss: 3.2442 - Accuracy: 0.9738


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.7613 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 71/95 - Loss: 3.0960 - Accuracy: 0.9733


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 5.3723 - Val Accuracy: 0.9620


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 72/95 - Loss: 3.1661 - Accuracy: 0.9755


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3315 - Val Accuracy: 0.9626


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 73/95 - Loss: 3.2557 - Accuracy: 0.9748


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.9955 - Val Accuracy: 0.9677


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 74/95 - Loss: 3.1304 - Accuracy: 0.9742


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.5377 - Val Accuracy: 0.9688


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 75/95 - Loss: 3.1314 - Accuracy: 0.9755


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.8831 - Val Accuracy: 0.9592


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 76/95 - Loss: 3.1693 - Accuracy: 0.9730


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4910 - Val Accuracy: 0.9666


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 77/95 - Loss: 3.1031 - Accuracy: 0.9748


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0652 - Val Accuracy: 0.9632


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 78/95 - Loss: 3.1655 - Accuracy: 0.9758


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 5.0094 - Val Accuracy: 0.9694


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 79/95 - Loss: 2.9573 - Accuracy: 0.9768


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.4951 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 80/95 - Loss: 3.1592 - Accuracy: 0.9743


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.6016 - Val Accuracy: 0.9632


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 81/95 - Loss: 3.1759 - Accuracy: 0.9746


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.2532 - Val Accuracy: 0.9643


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 82/95 - Loss: 3.2114 - Accuracy: 0.9736


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1372 - Val Accuracy: 0.9626


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 83/95 - Loss: 3.1013 - Accuracy: 0.9750


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.9835 - Val Accuracy: 0.9609


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 84/95 - Loss: 3.2921 - Accuracy: 0.9763


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.0657 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 85/95 - Loss: 3.2036 - Accuracy: 0.9757


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.7889 - Val Accuracy: 0.9683


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 86/95 - Loss: 2.9188 - Accuracy: 0.9748


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5180 - Val Accuracy: 0.9660


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 87/95 - Loss: 3.1439 - Accuracy: 0.9748


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 3.8802 - Val Accuracy: 0.9649


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 88/95 - Loss: 3.1132 - Accuracy: 0.9737


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3460 - Val Accuracy: 0.9609


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 89/95 - Loss: 3.2516 - Accuracy: 0.9747


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.5130 - Val Accuracy: 0.9671


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 90/95 - Loss: 3.0043 - Accuracy: 0.9744


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3462 - Val Accuracy: 0.9683


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 91/95 - Loss: 3.0309 - Accuracy: 0.9739


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.3825 - Val Accuracy: 0.9632


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 92/95 - Loss: 3.3352 - Accuracy: 0.9740


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 5.2191 - Val Accuracy: 0.9694


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 93/95 - Loss: 2.9548 - Accuracy: 0.9755


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.1664 - Val Accuracy: 0.9626


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 94/95 - Loss: 3.1434 - Accuracy: 0.9751


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 4.2654 - Val Accuracy: 0.9649


Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 95/95 - Loss: 3.2149 - Accuracy: 0.9764


  0%|          | 0/84 [00:00<?, ?it/s]

Val Loss: 5.6270 - Val Accuracy: 0.9683
Saving best model...


[32m[I 2023-12-07 23:37:32,866][0m Trial 11 finished with value: 0.9682537913322449 and parameters: {'learning_rate': 0.00017348444622346392, 'weight_decay': 0.00011451862597352213, 'epsilon': 8.227661069131743e-08, 'batch_size': 21, 'epochs': 95}. Best is trial 11 with value: 0.9682537913322449.[0m


Learning rate: 9.105630444960548e-05
Weight decay: 0.00010020079013284928
Epsilon: 9.386097456300994e-08
Batch size: 12
Number of epochs: 100


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 1/100 - Loss: 7.0681 - Accuracy: 0.9048


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 4.1742 - Val Accuracy: 0.9549


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 2/100 - Loss: 3.6574 - Accuracy: 0.9545


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 4.1138 - Val Accuracy: 0.9566


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 3/100 - Loss: 3.2178 - Accuracy: 0.9599


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.4674 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 4/100 - Loss: 2.9774 - Accuracy: 0.9610


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0162 - Val Accuracy: 0.9635


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 5/100 - Loss: 2.9526 - Accuracy: 0.9638


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0453 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 6/100 - Loss: 2.6589 - Accuracy: 0.9659


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1749 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 7/100 - Loss: 2.7631 - Accuracy: 0.9651


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0856 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 8/100 - Loss: 2.5566 - Accuracy: 0.9667


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8456 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 9/100 - Loss: 2.5814 - Accuracy: 0.9682


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.3753 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 10/100 - Loss: 2.4794 - Accuracy: 0.9681


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8172 - Val Accuracy: 0.9697


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 11/100 - Loss: 2.5370 - Accuracy: 0.9682


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.6898 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 12/100 - Loss: 2.3853 - Accuracy: 0.9690


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9177 - Val Accuracy: 0.9635


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 13/100 - Loss: 2.2866 - Accuracy: 0.9702


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7013 - Val Accuracy: 0.9635


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 14/100 - Loss: 2.3608 - Accuracy: 0.9697


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.4384 - Val Accuracy: 0.9629


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 15/100 - Loss: 2.4209 - Accuracy: 0.9712


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1886 - Val Accuracy: 0.9561


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 16/100 - Loss: 2.2248 - Accuracy: 0.9709


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.3464 - Val Accuracy: 0.9629


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 17/100 - Loss: 2.2082 - Accuracy: 0.9708


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.2810 - Val Accuracy: 0.9618


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 18/100 - Loss: 2.2291 - Accuracy: 0.9700


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9734 - Val Accuracy: 0.9618


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 19/100 - Loss: 2.2549 - Accuracy: 0.9705


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.3085 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 20/100 - Loss: 2.2090 - Accuracy: 0.9708


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0270 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 21/100 - Loss: 2.1686 - Accuracy: 0.9709


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7058 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 22/100 - Loss: 2.0805 - Accuracy: 0.9712


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5852 - Val Accuracy: 0.9669


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 23/100 - Loss: 2.2191 - Accuracy: 0.9718


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7049 - Val Accuracy: 0.9663


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 24/100 - Loss: 2.0767 - Accuracy: 0.9704


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.6678 - Val Accuracy: 0.9669


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 25/100 - Loss: 2.2287 - Accuracy: 0.9720


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9768 - Val Accuracy: 0.9635


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 26/100 - Loss: 2.2187 - Accuracy: 0.9718


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9018 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 27/100 - Loss: 2.2421 - Accuracy: 0.9720


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8194 - Val Accuracy: 0.9663


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 28/100 - Loss: 2.1749 - Accuracy: 0.9717


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0391 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 29/100 - Loss: 2.1074 - Accuracy: 0.9729


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7999 - Val Accuracy: 0.9663


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 30/100 - Loss: 2.0309 - Accuracy: 0.9721


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8622 - Val Accuracy: 0.9692


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 31/100 - Loss: 2.0731 - Accuracy: 0.9724


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8075 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 32/100 - Loss: 1.9324 - Accuracy: 0.9720


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7513 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 33/100 - Loss: 2.0110 - Accuracy: 0.9732


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1000 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 34/100 - Loss: 2.0227 - Accuracy: 0.9738


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9775 - Val Accuracy: 0.9692


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 35/100 - Loss: 2.1308 - Accuracy: 0.9720


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5551 - Val Accuracy: 0.9692


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 36/100 - Loss: 1.9997 - Accuracy: 0.9721


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1090 - Val Accuracy: 0.9623


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 37/100 - Loss: 2.0561 - Accuracy: 0.9737


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9531 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 38/100 - Loss: 2.0097 - Accuracy: 0.9738


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0578 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 39/100 - Loss: 2.0369 - Accuracy: 0.9726


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7761 - Val Accuracy: 0.9623


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 40/100 - Loss: 1.9956 - Accuracy: 0.9731


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.6560 - Val Accuracy: 0.9606


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 41/100 - Loss: 2.0016 - Accuracy: 0.9725


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8568 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 42/100 - Loss: 1.9424 - Accuracy: 0.9722


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0375 - Val Accuracy: 0.9623


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 43/100 - Loss: 2.1377 - Accuracy: 0.9727


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8452 - Val Accuracy: 0.9663


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 44/100 - Loss: 1.9840 - Accuracy: 0.9730


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5828 - Val Accuracy: 0.9680


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 45/100 - Loss: 2.1008 - Accuracy: 0.9717


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5788 - Val Accuracy: 0.9618


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 46/100 - Loss: 1.9547 - Accuracy: 0.9733


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9831 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 47/100 - Loss: 2.0634 - Accuracy: 0.9738


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.3484 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 48/100 - Loss: 1.9721 - Accuracy: 0.9733


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.6956 - Val Accuracy: 0.9669


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 49/100 - Loss: 2.0589 - Accuracy: 0.9722


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.4564 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 50/100 - Loss: 2.1001 - Accuracy: 0.9740


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5930 - Val Accuracy: 0.9680


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 51/100 - Loss: 1.9482 - Accuracy: 0.9737


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.1187 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 52/100 - Loss: 1.9924 - Accuracy: 0.9737


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7975 - Val Accuracy: 0.9635


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 53/100 - Loss: 1.8326 - Accuracy: 0.9732


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7978 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 54/100 - Loss: 1.9396 - Accuracy: 0.9740


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9791 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 55/100 - Loss: 2.0220 - Accuracy: 0.9729


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9982 - Val Accuracy: 0.9663


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 56/100 - Loss: 1.9346 - Accuracy: 0.9745


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0163 - Val Accuracy: 0.9675


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 57/100 - Loss: 1.9401 - Accuracy: 0.9737


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8824 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 58/100 - Loss: 1.9634 - Accuracy: 0.9732


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5754 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 59/100 - Loss: 1.9652 - Accuracy: 0.9746


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8668 - Val Accuracy: 0.9669


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 60/100 - Loss: 1.8465 - Accuracy: 0.9742


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.4559 - Val Accuracy: 0.9669


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 61/100 - Loss: 1.9469 - Accuracy: 0.9741


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5435 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 62/100 - Loss: 1.9035 - Accuracy: 0.9734


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7535 - Val Accuracy: 0.9583


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 63/100 - Loss: 1.7937 - Accuracy: 0.9736


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.4263 - Val Accuracy: 0.9629


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 64/100 - Loss: 1.9479 - Accuracy: 0.9743


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.3243 - Val Accuracy: 0.9629


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 65/100 - Loss: 1.9277 - Accuracy: 0.9739


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.6954 - Val Accuracy: 0.9618


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 66/100 - Loss: 1.9304 - Accuracy: 0.9743


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.2278 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 67/100 - Loss: 1.8178 - Accuracy: 0.9746


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8529 - Val Accuracy: 0.9635


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 68/100 - Loss: 1.9001 - Accuracy: 0.9742


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8024 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 69/100 - Loss: 1.8605 - Accuracy: 0.9742


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.6464 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 70/100 - Loss: 1.9760 - Accuracy: 0.9727


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7005 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 71/100 - Loss: 1.9217 - Accuracy: 0.9740


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7861 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 72/100 - Loss: 2.0071 - Accuracy: 0.9745


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8442 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 73/100 - Loss: 1.9610 - Accuracy: 0.9755


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0267 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 74/100 - Loss: 1.8965 - Accuracy: 0.9741


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9489 - Val Accuracy: 0.9606


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 75/100 - Loss: 1.9737 - Accuracy: 0.9757


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7651 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 76/100 - Loss: 2.0083 - Accuracy: 0.9744


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.2627 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 77/100 - Loss: 1.9394 - Accuracy: 0.9742


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5474 - Val Accuracy: 0.9658


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 78/100 - Loss: 1.8533 - Accuracy: 0.9754


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0707 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 79/100 - Loss: 2.0242 - Accuracy: 0.9743


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.5994 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 80/100 - Loss: 1.9971 - Accuracy: 0.9757


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7656 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 81/100 - Loss: 1.7754 - Accuracy: 0.9755


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7237 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 82/100 - Loss: 1.8479 - Accuracy: 0.9764


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1685 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 83/100 - Loss: 1.8480 - Accuracy: 0.9753


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1354 - Val Accuracy: 0.9646


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 84/100 - Loss: 1.9843 - Accuracy: 0.9750


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1567 - Val Accuracy: 0.9600


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 85/100 - Loss: 1.8679 - Accuracy: 0.9759


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8825 - Val Accuracy: 0.9589


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 86/100 - Loss: 1.9243 - Accuracy: 0.9744


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.2096 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 87/100 - Loss: 1.8306 - Accuracy: 0.9763


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1946 - Val Accuracy: 0.9629


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 88/100 - Loss: 1.8252 - Accuracy: 0.9755


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8314 - Val Accuracy: 0.9595


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 89/100 - Loss: 1.8962 - Accuracy: 0.9758


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8844 - Val Accuracy: 0.9686


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 90/100 - Loss: 1.8530 - Accuracy: 0.9754


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7590 - Val Accuracy: 0.9623


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 91/100 - Loss: 1.9796 - Accuracy: 0.9764


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.4865 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 92/100 - Loss: 1.9090 - Accuracy: 0.9767


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8856 - Val Accuracy: 0.9640


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 93/100 - Loss: 1.8602 - Accuracy: 0.9752


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.1887 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 94/100 - Loss: 1.8736 - Accuracy: 0.9768


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.8038 - Val Accuracy: 0.9675


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 95/100 - Loss: 1.8629 - Accuracy: 0.9755


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.7575 - Val Accuracy: 0.9612


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 96/100 - Loss: 1.8204 - Accuracy: 0.9747


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 2.9306 - Val Accuracy: 0.9606


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 97/100 - Loss: 1.8907 - Accuracy: 0.9747


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0710 - Val Accuracy: 0.9629


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 98/100 - Loss: 1.8230 - Accuracy: 0.9759


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.0913 - Val Accuracy: 0.9675


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 99/100 - Loss: 1.8106 - Accuracy: 0.9765


  0%|          | 0/146 [00:00<?, ?it/s]

Val Loss: 3.5356 - Val Accuracy: 0.9652


Training:   0%|          | 0/1165 [00:00<?, ?it/s]

Epoch: 100/100 - Loss: 1.9373 - Accuracy: 0.9773


  0%|          | 0/146 [00:00<?, ?it/s]

[32m[I 2023-12-08 00:56:39,477][0m Trial 12 finished with value: 0.9674657583236694 and parameters: {'learning_rate': 9.105630444960548e-05, 'weight_decay': 0.00010020079013284928, 'epsilon': 9.386097456300994e-08, 'batch_size': 12, 'epochs': 100}. Best is trial 11 with value: 0.9682537913322449.[0m


Val Loss: 2.5620 - Val Accuracy: 0.9675
Learning rate: 0.00011019796072418936
Weight decay: 0.0002539256417359753
Epsilon: 9.302094477917222e-08
Batch size: 10
Number of epochs: 99


Epochs:   0%|          | 0/99 [00:00<?, ?it/s]

Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 1/99 - Loss: 6.7125 - Accuracy: 0.9048


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.4816 - Val Accuracy: 0.9554


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 2/99 - Loss: 3.9244 - Accuracy: 0.9559


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0002 - Val Accuracy: 0.9589


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 3/99 - Loss: 3.4269 - Accuracy: 0.9599


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.1881 - Val Accuracy: 0.9640


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 4/99 - Loss: 3.4511 - Accuracy: 0.9631


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.3101 - Val Accuracy: 0.9611


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 5/99 - Loss: 3.3920 - Accuracy: 0.9642


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4106 - Val Accuracy: 0.9623


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 6/99 - Loss: 3.3005 - Accuracy: 0.9639


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8385 - Val Accuracy: 0.9640


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 7/99 - Loss: 3.2202 - Accuracy: 0.9644


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.6536 - Val Accuracy: 0.9600


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 8/99 - Loss: 3.2668 - Accuracy: 0.9662


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4490 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 9/99 - Loss: 3.1251 - Accuracy: 0.9670


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4099 - Val Accuracy: 0.9674


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 10/99 - Loss: 3.0709 - Accuracy: 0.9667


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 2.9618 - Val Accuracy: 0.9617


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 11/99 - Loss: 2.9309 - Accuracy: 0.9677


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.1225 - Val Accuracy: 0.9674


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 12/99 - Loss: 3.0614 - Accuracy: 0.9669


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8894 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 13/99 - Loss: 3.0438 - Accuracy: 0.9677


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4606 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 14/99 - Loss: 2.9669 - Accuracy: 0.9703


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5244 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 15/99 - Loss: 2.8548 - Accuracy: 0.9673


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.6258 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 16/99 - Loss: 2.7282 - Accuracy: 0.9685


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8099 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 17/99 - Loss: 2.8967 - Accuracy: 0.9692


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2936 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 18/99 - Loss: 3.1104 - Accuracy: 0.9692


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.2195 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 19/99 - Loss: 3.0881 - Accuracy: 0.9698


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8119 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 20/99 - Loss: 3.0114 - Accuracy: 0.9705


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8653 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 21/99 - Loss: 2.8086 - Accuracy: 0.9709


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9576 - Val Accuracy: 0.9669


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 22/99 - Loss: 2.8608 - Accuracy: 0.9695


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9498 - Val Accuracy: 0.9617


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 23/99 - Loss: 3.0245 - Accuracy: 0.9704


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8319 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 24/99 - Loss: 2.9261 - Accuracy: 0.9695


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9399 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 25/99 - Loss: 3.0340 - Accuracy: 0.9700


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.1399 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 26/99 - Loss: 2.8824 - Accuracy: 0.9700


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4809 - Val Accuracy: 0.9686


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 27/99 - Loss: 2.7591 - Accuracy: 0.9717


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.6941 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 28/99 - Loss: 2.8010 - Accuracy: 0.9710


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.6838 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 29/99 - Loss: 2.9864 - Accuracy: 0.9710


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5879 - Val Accuracy: 0.9611


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 30/99 - Loss: 2.8864 - Accuracy: 0.9696


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8957 - Val Accuracy: 0.9577


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 31/99 - Loss: 2.9111 - Accuracy: 0.9710


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7572 - Val Accuracy: 0.9709


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 32/99 - Loss: 2.9044 - Accuracy: 0.9717


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5111 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 33/99 - Loss: 2.6230 - Accuracy: 0.9731


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.2973 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 34/99 - Loss: 2.9469 - Accuracy: 0.9709


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5153 - Val Accuracy: 0.9686


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 35/99 - Loss: 2.7773 - Accuracy: 0.9717


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2165 - Val Accuracy: 0.9680


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 36/99 - Loss: 2.7901 - Accuracy: 0.9721


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4511 - Val Accuracy: 0.9680


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 37/99 - Loss: 2.6526 - Accuracy: 0.9715


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8177 - Val Accuracy: 0.9669


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 38/99 - Loss: 2.8049 - Accuracy: 0.9722


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.6565 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 39/99 - Loss: 2.8918 - Accuracy: 0.9727


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2841 - Val Accuracy: 0.9680


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 40/99 - Loss: 3.0122 - Accuracy: 0.9725


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2435 - Val Accuracy: 0.9680


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 41/99 - Loss: 2.9915 - Accuracy: 0.9741


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7058 - Val Accuracy: 0.9600


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 42/99 - Loss: 2.8335 - Accuracy: 0.9731


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2436 - Val Accuracy: 0.9623


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 43/99 - Loss: 2.9024 - Accuracy: 0.9726


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.1224 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 44/99 - Loss: 2.8548 - Accuracy: 0.9733


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5830 - Val Accuracy: 0.9669


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 45/99 - Loss: 2.7144 - Accuracy: 0.9719


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.3191 - Val Accuracy: 0.9623


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 46/99 - Loss: 2.9761 - Accuracy: 0.9737


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4478 - Val Accuracy: 0.9691


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 47/99 - Loss: 2.7180 - Accuracy: 0.9744


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9462 - Val Accuracy: 0.9583


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 48/99 - Loss: 2.7408 - Accuracy: 0.9737


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7478 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 49/99 - Loss: 2.8104 - Accuracy: 0.9716


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.3792 - Val Accuracy: 0.9640


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 50/99 - Loss: 2.9983 - Accuracy: 0.9727


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8147 - Val Accuracy: 0.9691


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 51/99 - Loss: 2.6806 - Accuracy: 0.9721


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5379 - Val Accuracy: 0.9674


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 52/99 - Loss: 2.8803 - Accuracy: 0.9738


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.5090 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 53/99 - Loss: 2.7877 - Accuracy: 0.9729


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.3771 - Val Accuracy: 0.9623


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 54/99 - Loss: 2.7197 - Accuracy: 0.9737


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9292 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 55/99 - Loss: 2.8458 - Accuracy: 0.9726


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.5524 - Val Accuracy: 0.9600


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 56/99 - Loss: 2.9321 - Accuracy: 0.9738


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.1663 - Val Accuracy: 0.9623


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 57/99 - Loss: 2.7965 - Accuracy: 0.9732


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.3487 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 58/99 - Loss: 2.7717 - Accuracy: 0.9734


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0478 - Val Accuracy: 0.9669


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 59/99 - Loss: 3.0110 - Accuracy: 0.9741


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7915 - Val Accuracy: 0.9611


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 60/99 - Loss: 2.9869 - Accuracy: 0.9739


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.7986 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 61/99 - Loss: 2.6241 - Accuracy: 0.9732


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.4225 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 62/99 - Loss: 2.7745 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.4675 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 63/99 - Loss: 2.8902 - Accuracy: 0.9725


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0517 - Val Accuracy: 0.9577


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 64/99 - Loss: 2.8990 - Accuracy: 0.9740


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.7373 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 65/99 - Loss: 2.8999 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7774 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 66/99 - Loss: 2.7295 - Accuracy: 0.9747


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8819 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 67/99 - Loss: 2.7238 - Accuracy: 0.9739


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9398 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 68/99 - Loss: 2.7822 - Accuracy: 0.9729


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.4432 - Val Accuracy: 0.9663


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 69/99 - Loss: 2.8907 - Accuracy: 0.9750


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9610 - Val Accuracy: 0.9714


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 70/99 - Loss: 2.7803 - Accuracy: 0.9737


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.6667 - Val Accuracy: 0.9680


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 71/99 - Loss: 2.8294 - Accuracy: 0.9735


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0878 - Val Accuracy: 0.9669


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 72/99 - Loss: 2.7207 - Accuracy: 0.9725


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9023 - Val Accuracy: 0.9640


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 73/99 - Loss: 2.7621 - Accuracy: 0.9742


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7534 - Val Accuracy: 0.9640


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 74/99 - Loss: 3.0170 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.3102 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 75/99 - Loss: 2.7893 - Accuracy: 0.9754


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.1569 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 76/99 - Loss: 2.6956 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.4279 - Val Accuracy: 0.9640


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 77/99 - Loss: 2.8284 - Accuracy: 0.9738


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.9206 - Val Accuracy: 0.9617


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 78/99 - Loss: 2.7875 - Accuracy: 0.9750


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.5454 - Val Accuracy: 0.9606


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 79/99 - Loss: 2.8048 - Accuracy: 0.9755


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8138 - Val Accuracy: 0.9611


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 80/99 - Loss: 2.7337 - Accuracy: 0.9741


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8898 - Val Accuracy: 0.9646


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 81/99 - Loss: 2.8226 - Accuracy: 0.9758


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2200 - Val Accuracy: 0.9686


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 82/99 - Loss: 2.7845 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.4999 - Val Accuracy: 0.9611


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 83/99 - Loss: 2.7705 - Accuracy: 0.9758


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2214 - Val Accuracy: 0.9589


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 84/99 - Loss: 2.9042 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.1558 - Val Accuracy: 0.9583


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 85/99 - Loss: 2.7347 - Accuracy: 0.9755


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.4017 - Val Accuracy: 0.9600


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 86/99 - Loss: 2.6969 - Accuracy: 0.9765


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 5.1590 - Val Accuracy: 0.9629


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 87/99 - Loss: 2.8050 - Accuracy: 0.9747


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 5.5168 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 88/99 - Loss: 2.7364 - Accuracy: 0.9755


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.8040 - Val Accuracy: 0.9617


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 89/99 - Loss: 2.7659 - Accuracy: 0.9761


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2538 - Val Accuracy: 0.9674


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 90/99 - Loss: 2.9568 - Accuracy: 0.9761


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2703 - Val Accuracy: 0.9617


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 91/99 - Loss: 2.9065 - Accuracy: 0.9763


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.5218 - Val Accuracy: 0.9663


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 92/99 - Loss: 2.7751 - Accuracy: 0.9756


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0756 - Val Accuracy: 0.9634


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 93/99 - Loss: 2.8491 - Accuracy: 0.9746


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7314 - Val Accuracy: 0.9617


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 94/99 - Loss: 2.8686 - Accuracy: 0.9759


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.2320 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 95/99 - Loss: 2.6834 - Accuracy: 0.9749


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 3.7869 - Val Accuracy: 0.9651


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 96/99 - Loss: 2.7337 - Accuracy: 0.9745


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0171 - Val Accuracy: 0.9657


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 97/99 - Loss: 2.8180 - Accuracy: 0.9740


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.0374 - Val Accuracy: 0.9680


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 98/99 - Loss: 2.8209 - Accuracy: 0.9767


  0%|          | 0/175 [00:00<?, ?it/s]

Val Loss: 4.3079 - Val Accuracy: 0.9691


Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 99/99 - Loss: 2.8454 - Accuracy: 0.9753


  0%|          | 0/175 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:20:53,102][0m Trial 13 finished with value: 0.9645713567733765 and parameters: {'learning_rate': 0.00011019796072418936, 'weight_decay': 0.0002539256417359753, 'epsilon': 9.302094477917222e-08, 'batch_size': 10, 'epochs': 99}. Best is trial 11 with value: 0.9682537913322449.[0m


Val Loss: 3.3470 - Val Accuracy: 0.9646
Learning rate: 0.0001183861142623738
Weight decay: 0.0002809082780181669
Epsilon: 9.982261670956841e-08
Batch size: 24
Number of epochs: 99


Epochs:   0%|          | 0/99 [00:00<?, ?it/s]

Training:   0%|          | 0/583 [00:00<?, ?it/s]

Epoch: 1/99 - Loss: 7.0520 - Accuracy: 0.8885


  0%|          | 0/73 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:21:45,698][0m Trial 14 pruned. [0m


Val Loss: 4.4463 - Val Accuracy: 0.9509
Learning rate: 9.60320180047481e-05
Weight decay: 0.0001016853419148899
Epsilon: 5.851166464717649e-08
Batch size: 10
Number of epochs: 85


Epochs:   0%|          | 0/85 [00:00<?, ?it/s]

Training:   0%|          | 0/1398 [00:00<?, ?it/s]

Epoch: 1/85 - Loss: 6.9363 - Accuracy: 0.8991


  0%|          | 0/175 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:22:34,496][0m Trial 15 pruned. [0m


Val Loss: 3.9748 - Val Accuracy: 0.9497
Learning rate: 0.00024325414126856603
Weight decay: 0.0013730307539964275
Epsilon: 1.5656440251127447e-08
Batch size: 30
Number of epochs: 90


Epochs:   0%|          | 0/90 [00:00<?, ?it/s]

Training:   0%|          | 0/466 [00:00<?, ?it/s]

Epoch: 1/90 - Loss: 6.6368 - Accuracy: 0.9205


  0%|          | 0/59 [00:00<?, ?it/s]

Val Loss: 4.1107 - Val Accuracy: 0.9593


Training:   0%|          | 0/466 [00:00<?, ?it/s]

Epoch: 2/90 - Loss: 4.1634 - Accuracy: 0.9555


  0%|          | 0/59 [00:00<?, ?it/s]

Val Loss: 4.2030 - Val Accuracy: 0.9582


Training:   0%|          | 0/466 [00:00<?, ?it/s]

Epoch: 3/90 - Loss: 3.8378 - Accuracy: 0.9615


  0%|          | 0/59 [00:00<?, ?it/s]

Val Loss: 3.7951 - Val Accuracy: 0.9565


Training:   0%|          | 0/466 [00:00<?, ?it/s]

Epoch: 4/90 - Loss: 3.7866 - Accuracy: 0.9621


  0%|          | 0/59 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:25:47,367][0m Trial 16 pruned. [0m


Val Loss: 3.5090 - Val Accuracy: 0.9458
Learning rate: 0.001631705016904602
Weight decay: 0.00040356064167022695
Epsilon: 5.447865838710331e-08
Batch size: 31
Number of epochs: 35


Epochs:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/451 [00:00<?, ?it/s]

Epoch: 1/35 - Loss: 39.9911 - Accuracy: 0.9272


  0%|          | 0/57 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:26:34,459][0m Trial 17 pruned. [0m


Val Loss: 80.5494 - Val Accuracy: 0.9394
Learning rate: 5.8012230269608875e-05
Weight decay: 0.00016233818340921622
Epsilon: 1.8106294902607915e-08
Batch size: 16
Number of epochs: 80


Epochs:   0%|          | 0/80 [00:00<?, ?it/s]

Training:   0%|          | 0/874 [00:00<?, ?it/s]

Epoch: 1/80 - Loss: 8.3538 - Accuracy: 0.8640


  0%|          | 0/110 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:27:19,968][0m Trial 18 pruned. [0m


Val Loss: 4.9684 - Val Accuracy: 0.9449
Learning rate: 4.653162730205759e-05
Weight decay: 0.002093589254055245
Epsilon: 6.492016355752239e-08
Batch size: 40
Number of epochs: 99


Epochs:   0%|          | 0/99 [00:00<?, ?it/s]

Training:   0%|          | 0/350 [00:00<?, ?it/s]

Epoch: 1/99 - Loss: 10.7893 - Accuracy: 0.8068


  0%|          | 0/44 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:28:10,382][0m Trial 19 pruned. [0m


Val Loss: 6.7747 - Val Accuracy: 0.9297
Learning rate: 0.000750387169630592
Weight decay: 0.00034339289435580494
Epsilon: 9.81097933918493e-09
Batch size: 21
Number of epochs: 44


Epochs:   0%|          | 0/44 [00:00<?, ?it/s]

Training:   0%|          | 0/666 [00:00<?, ?it/s]

Epoch: 1/44 - Loss: 14.5199 - Accuracy: 0.9282


  0%|          | 0/84 [00:00<?, ?it/s]

[32m[I 2023-12-08 02:28:58,746][0m Trial 20 pruned. [0m


Val Loss: 16.8204 - Val Accuracy: 0.9461

Study statistics: 
  Number of finished trials:  21
  Number of pruned trials:  12
  Number of complete trials:  8


In [23]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9682537913322449
  Params: 
    batch_size: 21
    epochs: 95
    epsilon: 8.227661069131743e-08
    learning_rate: 0.00017348444622346392
    weight_decay: 0.00011451862597352213


In [None]:
# ViT P12-S8 Triplet Mean

Best trial:
Value:  0.9682537913322449
Params: 
batch_size: 21
epochs: 95
epsilon: 8.227661069131743e-08
learning_rate: 0.00017348444622346392
weight_decay: 0.00011451862597352213