In [1]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from pytorch_metric_learning import losses
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm import tqdm

In [2]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=2)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/")
val_data = AdienceDataset("../val.csv", "../cropped_Adience/")

In [7]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [8]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [9]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [10]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [11]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [12]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [13]:
class ViT_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label=None, mask=None):
        p = self.patch_size
        
        x = rearrange(img, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = p, p2 = p)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        # y = x[:, 0]
        z = x[:, 1:].mean(dim = 1)

        # y = self.to_latent(y)
        # emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        # emb = torch.cat((emb_y, emb_z), dim=1)
        emb = emb_z
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [14]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=512, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [15]:
model = ViT_face(
            image_size=112,
            patch_size=8,
            loss_type='CosFace',
            GPU_ID= [device],
            num_class=93431,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Backbone_VIT_Epoch_2_Batch_20000_Time_2021-01-12-16-48_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=2)]


<All keys matched successfully>

In [16]:
for param in model.parameters():
    param.requires_grad = False

In [17]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, label, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, label, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [18]:
best_accu = 0.9245803356170654
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    loss_lr = trial.suggest_float("loss_learning_rate", 1e-4, 1e-2, log=True)
    cos_margin = losses.CosFaceLoss(2, 512).to(device)
    loss_optimizer = opt.AdamW(cos_margin.parameters(), lr=loss_lr)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    criterion = nn.CrossEntropyLoss()
    
    batch_size = trial.suggest_int('batch_size', 50, 300)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate for Loss: "+ str(loss_lr))
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, label, file in tqdm(train_loader, desc="Training", leave=False):
            img, label = img.to(device), label.to(device)

            x = file_to_embed(embeds, file)
            
            optimizer.zero_grad()
            embed, output = model_xtr(x)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            class_loss = criterion(output, label)
            cos_loss = cos_margin(embed, label)
            loss = class_loss + cos_loss
            loss.backward()
            loss_optimizer.step()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, label, file in tqdm(val_loader):
                img, label = img.to(device), label.to(device)
                
                x = file_to_embed(embeds, file)
                
                embed, output = model_xtr(x)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                class_loss = criterion(output, label)
                cos_loss = cos_margin(embed, label)
                loss = class_loss + cos_loss
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_8-8_cosface_mean_only.pt")
            
    return val_accu

In [21]:
study = optuna.create_study(direction='maximize',
                            study_name='cosface-8-8-mean-only-vit-study',
                            storage='sqlite:///study2.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=5)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-14 11:01:35,025][0m Using an existing study with name 'cosface-8-8-mean-only-vit-study' instead of creating a new one.[0m


Learning rate for Loss: 0.006668550287752485
Learning rate: 0.018960297938816782
Weight decay: 0.0039344672822713995
Epsilon: 5.376869373261531e-09
Batch size: 132
Number of epochs: 44


Epochs:   0%|          | 0/44 [00:00<?, ?it/s]
Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<02:09,  1.23s/it][A
Training:   5%|▍         | 5/106 [00:02<00:39,  2.58it/s][A
Training:   8%|▊         | 9/106 [00:03<00:28,  3.46it/s][A
Training:  11%|█▏        | 12/106 [00:03<00:18,  5.07it/s][A
Training:  13%|█▎        | 14/106 [00:03<00:22,  4.07it/s][A
Training:  15%|█▌        | 16/106 [00:04<00:17,  5.12it/s][A
Training:  17%|█▋        | 18/106 [00:04<00:20,  4.29it/s][A
Training:  20%|█▉        | 21/106 [00:05<00:20,  4.22it/s][A
Training:  22%|██▏       | 23/106 [00:05<00:15,  5.24it/s][A
Training:  24%|██▎       | 25/106 [00:06<00:20,  3.90it/s][A
Training:  26%|██▋       | 28/106 [00:06<00:13,  5.64it/s][A
Training:  28%|██▊       | 30/106 [00:07<00:18,  4.18it/s][A
Training:  31%|███       | 33/106 [00:08<00:18,  3.94it/s][A
Training:  33%|███▎      | 35/106 [00:08<00:14,  4.75it/s][A
Training:  35%|███▍      | 37/106 [

Epoch: 1/44 - Loss: 18.3927 - Accuracy: 0.7638



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:19,  1.47s/it][A
 21%|██▏       | 3/14 [00:01<00:05,  2.17it/s][A
 36%|███▌      | 5/14 [00:02<00:04,  2.24it/s][A
 50%|█████     | 7/14 [00:02<00:01,  3.56it/s][A
 64%|██████▍   | 9/14 [00:03<00:01,  2.78it/s][A
 79%|███████▊  | 11/14 [00:03<00:00,  3.78it/s][A
100%|██████████| 14/14 [00:04<00:00,  3.03it/s][A
Epochs:   2%|▏         | 1/44 [00:29<21:13, 29.62s/it]

Val Loss: 5.6499 - Val Accuracy: 0.8965



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<02:57,  1.69s/it][A
Training:   4%|▍         | 4/106 [00:01<00:35,  2.86it/s][A
Training:   6%|▌         | 6/106 [00:02<00:44,  2.27it/s][A
Training:   8%|▊         | 8/106 [00:03<00:29,  3.33it/s][A
Training:   9%|▉         | 10/106 [00:04<00:37,  2.56it/s][A
Training:  11%|█▏        | 12/106 [00:04<00:26,  3.55it/s][A
Training:  13%|█▎        | 14/106 [00:05<00:32,  2.81it/s][A
Training:  15%|█▌        | 16/106 [00:05<00:23,  3.81it/s][A
Training:  17%|█▋        | 18/106 [00:06<00:28,  3.14it/s][A
Training:  20%|█▉        | 21/106 [00:07<00:29,  2.92it/s][A
Training:  22%|██▏       | 23/106 [00:07<00:23,  3.59it/s][A
Training:  24%|██▎       | 25/106 [00:08<00:24,  3.37it/s][A
Training:  25%|██▍       | 26/106 [00:08<00:21,  3.69it/s][A
Training:  25%|██▌       | 27/106 [00:08<00:19,  4.11it/s][A
Training:  26%|██▋       | 28/106 [00:08<00:17,  4.51it/s][A
Training:  27%|██▋  

Epoch: 2/44 - Loss: 6.2852 - Accuracy: 0.8987



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:20,  1.59s/it][A
 21%|██▏       | 3/14 [00:01<00:05,  2.14it/s][A
 36%|███▌      | 5/14 [00:02<00:04,  1.88it/s][A
 43%|████▎     | 6/14 [00:03<00:03,  2.39it/s][A
 64%|██████▍   | 9/14 [00:04<00:01,  2.72it/s][A
 79%|███████▊  | 11/14 [00:04<00:00,  3.77it/s][A
100%|██████████| 14/14 [00:04<00:00,  2.81it/s][A
Epochs:   5%|▍         | 2/44 [01:05<23:24, 33.45s/it]

Val Loss: 5.2344 - Val Accuracy: 0.9057



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<02:27,  1.40s/it][A
Training:   2%|▏         | 2/106 [00:01<01:19,  1.31it/s][A
Training:   3%|▎         | 3/106 [00:01<00:48,  2.13it/s][A
Training:   4%|▍         | 4/106 [00:01<00:33,  3.08it/s][A
Training:   5%|▍         | 5/106 [00:02<00:39,  2.53it/s][A
Training:   6%|▌         | 6/106 [00:02<00:40,  2.49it/s][A
Training:   8%|▊         | 9/106 [00:03<00:33,  2.93it/s][A
Training:  10%|█         | 11/106 [00:03<00:22,  4.20it/s][A
Training:  12%|█▏        | 13/106 [00:04<00:30,  3.09it/s][A
Training:  14%|█▍        | 15/106 [00:05<00:23,  3.84it/s][A
Training:  15%|█▌        | 16/106 [00:05<00:21,  4.18it/s][A
Training:  16%|█▌        | 17/106 [00:05<00:28,  3.09it/s][A
Training:  18%|█▊        | 19/106 [00:06<00:20,  4.19it/s][A
Training:  19%|█▉        | 20/106 [00:06<00:18,  4.73it/s][A
Training:  20%|█▉        | 21/106 [00:06<00:26,  3.16it/s][A
Training:  21%|██      

Epoch: 3/44 - Loss: 4.9630 - Accuracy: 0.9167



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:18,  1.46s/it][A
 21%|██▏       | 3/14 [00:01<00:05,  2.06it/s][A
 36%|███▌      | 5/14 [00:02<00:03,  2.34it/s][A
 50%|█████     | 7/14 [00:02<00:01,  3.57it/s][A
 57%|█████▋    | 8/14 [00:02<00:01,  4.05it/s][A
 64%|██████▍   | 9/14 [00:03<00:01,  2.72it/s][A
 71%|███████▏  | 10/14 [00:03<00:01,  3.32it/s][A
 86%|████████▌ | 12/14 [00:03<00:00,  4.66it/s][A
100%|██████████| 14/14 [00:04<00:00,  3.09it/s][A
Epochs:   7%|▋         | 3/44 [01:38<22:42, 33.23s/it]

Val Loss: 5.4498 - Val Accuracy: 0.9171



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<02:51,  1.63s/it][A
Training:   2%|▏         | 2/106 [00:01<01:18,  1.32it/s][A
Training:   3%|▎         | 3/106 [00:01<00:49,  2.08it/s][A
Training:   5%|▍         | 5/106 [00:02<00:39,  2.56it/s][A
Training:   6%|▌         | 6/106 [00:02<00:34,  2.91it/s][A
Training:   8%|▊         | 8/106 [00:02<00:21,  4.53it/s][A
Training:   8%|▊         | 9/106 [00:03<00:33,  2.93it/s][A
Training:   9%|▉         | 10/106 [00:03<00:29,  3.22it/s][A
Training:  11%|█▏        | 12/106 [00:04<00:19,  4.84it/s][A
Training:  12%|█▏        | 13/106 [00:04<00:33,  2.80it/s][A
Training:  14%|█▍        | 15/106 [00:04<00:21,  4.22it/s][A
Training:  16%|█▌        | 17/106 [00:06<00:29,  2.97it/s][A
Training:  17%|█▋        | 18/106 [00:06<00:25,  3.46it/s][A
Training:  19%|█▉        | 20/106 [00:06<00:17,  4.97it/s][A
Training:  21%|██        | 22/106 [00:06<00:22,  3.76it/s][A
Training:  22%|██▏     

Epoch: 4/44 - Loss: 4.7682 - Accuracy: 0.9166



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:17,  1.31s/it][A
 21%|██▏       | 3/14 [00:01<00:04,  2.62it/s][A
 36%|███▌      | 5/14 [00:02<00:03,  2.42it/s][A
 57%|█████▋    | 8/14 [00:02<00:01,  4.15it/s][A
 64%|██████▍   | 9/14 [00:03<00:01,  2.86it/s][A
 79%|███████▊  | 11/14 [00:03<00:00,  3.94it/s][A
100%|██████████| 14/14 [00:04<00:00,  3.19it/s][A
Epochs:   9%|▉         | 4/44 [02:13<22:27, 33.70s/it]

Val Loss: 4.5994 - Val Accuracy: 0.9203



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<02:31,  1.45s/it][A
Training:   3%|▎         | 3/106 [00:01<00:43,  2.37it/s][A
Training:   5%|▍         | 5/106 [00:02<00:40,  2.47it/s][A
Training:   7%|▋         | 7/106 [00:02<00:25,  3.86it/s][A
Training:   8%|▊         | 9/106 [00:03<00:29,  3.30it/s][A
Training:  10%|█         | 11/106 [00:03<00:23,  4.06it/s][A
Training:  12%|█▏        | 13/106 [00:04<00:26,  3.54it/s][A
Training:  14%|█▍        | 15/106 [00:04<00:20,  4.39it/s][A
Training:  16%|█▌        | 17/106 [00:05<00:25,  3.52it/s][A
Training:  20%|█▉        | 21/106 [00:06<00:22,  3.82it/s][A
Training:  23%|██▎       | 24/106 [00:06<00:15,  5.41it/s][A
Training:  25%|██▍       | 26/106 [00:07<00:19,  4.07it/s][A
Training:  27%|██▋       | 29/106 [00:08<00:19,  3.89it/s][A
Training:  29%|██▉       | 31/106 [00:08<00:15,  4.82it/s][A
Training:  31%|███       | 33/106 [00:09<00:19,  3.70it/s][A
Training:  33%|███▎  

Epoch: 5/44 - Loss: 4.6152 - Accuracy: 0.9183



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:16,  1.27s/it][A
 36%|███▌      | 5/14 [00:02<00:03,  2.56it/s][A
 64%|██████▍   | 9/14 [00:03<00:01,  3.48it/s][A
100%|██████████| 14/14 [00:03<00:00,  3.60it/s][A
Epochs:  11%|█▏        | 5/44 [02:42<20:57, 32.25s/it]

Val Loss: 4.2841 - Val Accuracy: 0.9117



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<02:10,  1.24s/it][A
Training:   3%|▎         | 3/106 [00:01<00:38,  2.69it/s][A
Training:   5%|▍         | 5/106 [00:02<00:37,  2.69it/s][A
Training:   8%|▊         | 8/106 [00:02<00:19,  4.96it/s][A
Training:   9%|▉         | 10/106 [00:03<00:24,  3.87it/s][A
Training:  11%|█▏        | 12/106 [00:03<00:18,  4.95it/s][A
Training:  12%|█▏        | 13/106 [00:03<00:26,  3.49it/s][A
Training:  14%|█▍        | 15/106 [00:03<00:19,  4.77it/s][A
Training:  15%|█▌        | 16/106 [00:04<00:18,  4.99it/s][A
Training:  16%|█▌        | 17/106 [00:04<00:27,  3.26it/s][A
Training:  18%|█▊        | 19/106 [00:04<00:18,  4.69it/s][A
Training:  19%|█▉        | 20/106 [00:05<00:17,  4.99it/s][A
Training:  20%|█▉        | 21/106 [00:05<00:26,  3.18it/s][A
Training:  23%|██▎       | 24/106 [00:06<00:16,  5.12it/s][A
Training:  24%|██▎       | 25/106 [00:06<00:22,  3.61it/s][A
Training:  26%|██▋  

Epoch: 6/44 - Loss: 4.5749 - Accuracy: 0.9106



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:13,  1.04s/it][A
 36%|███▌      | 5/14 [00:01<00:02,  3.33it/s][A
 64%|██████▍   | 9/14 [00:02<00:01,  4.31it/s][A
100%|██████████| 14/14 [00:03<00:00,  4.55it/s][A
Epochs:  14%|█▎        | 6/44 [03:09<19:10, 30.29s/it]

Val Loss: 4.4493 - Val Accuracy: 0.9201



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<01:49,  1.04s/it][A
Training:   3%|▎         | 3/106 [00:01<00:31,  3.24it/s][A
Training:   5%|▍         | 5/106 [00:01<00:28,  3.60it/s][A
Training:   7%|▋         | 7/106 [00:01<00:18,  5.30it/s][A
Training:   8%|▊         | 9/106 [00:02<00:21,  4.47it/s][A
Training:  10%|█         | 11/106 [00:02<00:16,  5.85it/s][A
Training:  12%|█▏        | 13/106 [00:03<00:20,  4.54it/s][A
Training:  15%|█▌        | 16/106 [00:03<00:13,  6.85it/s][A
Training:  17%|█▋        | 18/106 [00:03<00:17,  5.09it/s][A
Training:  20%|█▉        | 21/106 [00:04<00:16,  5.14it/s][A
Training:  24%|██▎       | 25/106 [00:05<00:14,  5.50it/s][A
Training:  26%|██▋       | 28/106 [00:05<00:10,  7.28it/s][A
Training:  28%|██▊       | 30/106 [00:05<00:13,  5.80it/s][A
Training:  31%|███       | 33/106 [00:06<00:13,  5.48it/s][A
Training:  34%|███▍      | 36/106 [00:06<00:09,  7.35it/s][A
Training:  36%|███▌  

Epoch: 7/44 - Loss: 4.1851 - Accuracy: 0.9200



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:14,  1.12s/it][A
 21%|██▏       | 3/14 [00:01<00:03,  3.05it/s][A
 36%|███▌      | 5/14 [00:01<00:02,  3.13it/s][A
 64%|██████▍   | 9/14 [00:02<00:01,  4.30it/s][A
 79%|███████▊  | 11/14 [00:02<00:00,  5.43it/s][A
100%|██████████| 14/14 [00:03<00:00,  4.15it/s][A
Epochs:  16%|█▌        | 7/44 [03:31<17:03, 27.66s/it]

Val Loss: 4.2013 - Val Accuracy: 0.9206



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<01:54,  1.09s/it][A
Training:   3%|▎         | 3/106 [00:01<00:34,  3.00it/s][A
Training:   5%|▍         | 5/106 [00:01<00:31,  3.21it/s][A
Training:   7%|▋         | 7/106 [00:01<00:19,  4.98it/s][A
Training:   8%|▊         | 9/106 [00:02<00:23,  4.07it/s][A
Training:  12%|█▏        | 13/106 [00:03<00:19,  4.70it/s][A
Training:  14%|█▍        | 15/106 [00:03<00:15,  5.81it/s][A
Training:  16%|█▌        | 17/106 [00:04<00:19,  4.56it/s][A
Training:  19%|█▉        | 20/106 [00:04<00:13,  6.50it/s][A
Training:  21%|██        | 22/106 [00:04<00:15,  5.33it/s][A
Training:  23%|██▎       | 24/106 [00:04<00:12,  6.58it/s][A
Training:  25%|██▍       | 26/106 [00:05<00:15,  5.01it/s][A
Training:  25%|██▌       | 27/106 [00:05<00:14,  5.33it/s][A
Training:  27%|██▋       | 29/106 [00:06<00:18,  4.28it/s][A
Training:  29%|██▉       | 31/106 [00:06<00:13,  5.59it/s][A
Training:  31%|███   

Epoch: 8/44 - Loss: 4.1736 - Accuracy: 0.9180



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:13,  1.03s/it][A
 36%|███▌      | 5/14 [00:01<00:02,  3.35it/s][A
 64%|██████▍   | 9/14 [00:02<00:01,  4.55it/s][A
100%|██████████| 14/14 [00:02<00:00,  4.73it/s][A
Epochs:  18%|█▊        | 8/44 [03:54<15:36, 26.01s/it]

Val Loss: 4.2728 - Val Accuracy: 0.9106



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:00<01:29,  1.17it/s][A
Training:   5%|▍         | 5/106 [00:01<00:25,  4.00it/s][A
Training:   8%|▊         | 9/106 [00:02<00:18,  5.13it/s][A
Training:  12%|█▏        | 13/106 [00:02<00:15,  6.09it/s][A
Training:  16%|█▌        | 17/106 [00:03<00:13,  6.43it/s][A
Training:  17%|█▋        | 18/106 [00:03<00:13,  6.39it/s][A
Training:  20%|█▉        | 21/106 [00:03<00:14,  5.93it/s][A
Training:  24%|██▎       | 25/106 [00:04<00:13,  6.10it/s][A
Training:  25%|██▍       | 26/106 [00:04<00:13,  6.06it/s][A
Training:  27%|██▋       | 29/106 [00:05<00:12,  5.93it/s][A
Training:  29%|██▉       | 31/106 [00:05<00:10,  6.85it/s][A
Training:  31%|███       | 33/106 [00:05<00:12,  5.75it/s][A
Training:  32%|███▏      | 34/106 [00:05<00:11,  6.08it/s][A
Training:  35%|███▍      | 37/106 [00:06<00:11,  6.01it/s][A
Training:  36%|███▌      | 38/106 [00:06<00:11,  5.73it/s][A
Training:  39%|███▊

Epoch: 9/44 - Loss: 3.9200 - Accuracy: 0.9189



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:01<00:17,  1.34s/it][A
 36%|███▌      | 5/14 [00:01<00:03,  2.98it/s][A
 64%|██████▍   | 9/14 [00:02<00:01,  3.78it/s][A
 86%|████████▌ | 12/14 [00:02<00:00,  5.46it/s][A
100%|██████████| 14/14 [00:03<00:00,  3.91it/s][A
Epochs:  20%|██        | 9/44 [04:15<14:21, 24.61s/it]

Val Loss: 4.5779 - Val Accuracy: 0.9196



Training:   0%|          | 0/106 [00:00<?, ?it/s][A
Training:   1%|          | 1/106 [00:01<01:51,  1.06s/it][A
Training:   3%|▎         | 3/106 [00:01<00:34,  2.99it/s][A
Training:   5%|▍         | 5/106 [00:01<00:33,  3.04it/s][A
Training:   7%|▋         | 7/106 [00:02<00:22,  4.31it/s][A
Training:   8%|▊         | 9/106 [00:02<00:24,  4.02it/s][A
Training:  10%|█         | 11/106 [00:02<00:18,  5.15it/s][A
Training:  12%|█▏        | 13/106 [00:03<00:18,  4.99it/s][A
Training:  14%|█▍        | 15/106 [00:03<00:14,  6.26it/s][A
Training:  16%|█▌        | 17/106 [00:03<00:17,  5.16it/s][A
Training:  18%|█▊        | 19/106 [00:04<00:14,  6.14it/s][A
Training:  20%|█▉        | 21/106 [00:04<00:16,  5.28it/s][A
Training:  22%|██▏       | 23/106 [00:04<00:14,  5.72it/s][A
Training:  24%|██▎       | 25/106 [00:05<00:16,  5.04it/s][A
Training:  25%|██▌       | 27/106 [00:05<00:14,  5.53it/s][A
Training:  27%|██▋       | 29/106 [00:06<00:14,  5.39it/s][A
Training:  29%|██▉   

Epoch: 10/44 - Loss: 4.1144 - Accuracy: 0.9139



  0%|          | 0/14 [00:00<?, ?it/s][A
  7%|▋         | 1/14 [00:00<00:12,  1.02it/s][A
 36%|███▌      | 5/14 [00:01<00:02,  3.60it/s][A
 64%|██████▍   | 9/14 [00:02<00:01,  4.49it/s][A
100%|██████████| 14/14 [00:03<00:00,  4.64it/s][A
Epochs:  20%|██        | 9/44 [04:38<18:02, 30.94s/it]
[32m[I 2023-12-14 11:06:14,383][0m Trial 15 pruned. [0m


Val Loss: 4.8095 - Val Accuracy: 0.9077
Learning rate for Loss: 0.0013642692872476868
Learning rate: 0.002288803503943796
Weight decay: 0.00014500461925799588
Epsilon: 1.6797483122420435e-09
Batch size: 194
Number of epochs: 25


Epochs:   0%|          | 0/25 [00:00<?, ?it/s]
Training:   0%|          | 0/73 [00:00<?, ?it/s][A
Training:   1%|▏         | 1/73 [00:01<02:00,  1.68s/it][A
Training:   4%|▍         | 3/73 [00:01<00:33,  2.10it/s][A
Training:   7%|▋         | 5/73 [00:02<00:35,  1.90it/s][A
Training:  10%|▉         | 7/73 [00:03<00:22,  3.00it/s][A
Training:  12%|█▏        | 9/73 [00:03<00:23,  2.68it/s][A
Training:  16%|█▋        | 12/73 [00:04<00:13,  4.50it/s][A
Training:  19%|█▉        | 14/73 [00:04<00:17,  3.44it/s][A
Training:  23%|██▎       | 17/73 [00:05<00:16,  3.40it/s][A
Training:  26%|██▌       | 19/73 [00:05<00:12,  4.33it/s][A
Training:  29%|██▉       | 21/73 [00:06<00:15,  3.42it/s][A
Training:  33%|███▎      | 24/73 [00:06<00:09,  5.10it/s][A
Training:  36%|███▌      | 26/73 [00:07<00:12,  3.79it/s][A
Training:  40%|███▉      | 29/73 [00:08<00:12,  3.61it/s][A
Training:  42%|████▏     | 31/73 [00:08<00:09,  4.54it/s][A
Training:  45%|████▌     | 33/73 [00:09<00:11,  3.57

Epoch: 1/25 - Loss: 14.7272 - Accuracy: 0.8089



  0%|          | 0/9 [00:00<?, ?it/s][A
 11%|█         | 1/9 [00:01<00:10,  1.25s/it][A
 44%|████▍     | 4/9 [00:01<00:01,  3.74it/s][A
 67%|██████▋   | 6/9 [00:02<00:01,  2.91it/s][A
100%|██████████| 9/9 [00:03<00:00,  2.81it/s][A
Epochs:   0%|          | 0/25 [00:22<?, ?it/s]
[32m[I 2023-12-14 11:06:37,361][0m Trial 16 pruned. [0m


Val Loss: 8.3841 - Val Accuracy: 0.8711
Learning rate for Loss: 0.0033107389307156145
Learning rate: 0.0003838612275375301
Weight decay: 0.0033012134894320976
Epsilon: 3.39228871048199e-09
Batch size: 94
Number of epochs: 59


Epochs:   0%|          | 0/59 [00:00<?, ?it/s]
Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:00,  1.23it/s][A
Training:   3%|▎         | 5/149 [00:01<00:32,  4.41it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.10it/s][A
Training:   7%|▋         | 11/149 [00:01<00:18,  7.47it/s][A
Training:   9%|▊         | 13/149 [00:02<00:21,  6.26it/s][A
Training:  10%|█         | 15/149 [00:02<00:17,  7.79it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:19,  6.71it/s][A
Training:  13%|█▎        | 19/149 [00:03<00:17,  7.50it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:18,  7.05it/s][A
Training:  15%|█▌        | 23/149 [00:03<00:15,  7.95it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:17,  7.03it/s][A
Training:  18%|█▊        | 27/149 [00:04<00:14,  8.49it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:14,  8.10it/s][A
Training:  20%|██        | 30/149 [00:04<00:15,  7.79it/s][A
Training:  22%|██▏       | 33/149 [

Epoch: 1/59 - Loss: 10.3757 - Accuracy: 0.7948



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.28it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.62it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.39it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.99it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.50it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.08it/s][A
Epochs:   2%|▏         | 1/59 [00:21<20:43, 21.44s/it]

Val Loss: 5.5011 - Val Accuracy: 0.9032



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:32,  1.59it/s][A
Training:   3%|▎         | 5/149 [00:01<00:25,  5.62it/s][A
Training:   6%|▌         | 9/149 [00:01<00:18,  7.53it/s][A
Training:   8%|▊         | 12/149 [00:01<00:13, 10.03it/s][A
Training:   9%|▉         | 14/149 [00:02<00:18,  7.29it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:18,  7.21it/s][A
Training:  13%|█▎        | 19/149 [00:02<00:15,  8.57it/s][A
Training:  14%|█▍        | 21/149 [00:02<00:18,  7.05it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:15,  8.09it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:11, 10.17it/s][A
Training:  20%|██        | 30/149 [00:03<00:15,  7.71it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:16,  7.25it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:15,  7.37it/s][A
Training:  27%|██▋       | 40/149 [00:05<00:11,  9.29it/s][A
Training:  28%|██▊       | 42/149 [00:05<00:13,  7.85it/s][A
Training:  30%|███ 

Epoch: 2/59 - Loss: 5.4432 - Accuracy: 0.9065



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:11,  1.52it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.99it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.48it/s][A
 58%|█████▊    | 11/19 [00:01<00:01,  7.74it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.42it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.01it/s][A
Epochs:   3%|▎         | 2/59 [00:41<19:39, 20.69s/it]

Val Loss: 4.7847 - Val Accuracy: 0.9208



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:57,  1.26it/s][A
Training:   3%|▎         | 5/149 [00:01<00:31,  4.52it/s][A
Training:   5%|▌         | 8/149 [00:01<00:18,  7.59it/s][A
Training:   7%|▋         | 10/149 [00:01<00:20,  6.70it/s][A
Training:   9%|▊         | 13/149 [00:02<00:20,  6.57it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:17,  7.43it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:15,  8.18it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:14,  8.27it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:12,  9.92it/s][A
Training:  20%|██        | 30/149 [00:04<00:15,  7.89it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:15,  7.37it/s][A
Training:  23%|██▎       | 35/149 [00:04<00:13,  8.54it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:15,  7.30it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:13,  8.21it/s][A
Training:  30%|███       | 45/149 [00:06<00:12,  8.27it/s][A
Training:  33%|███▎

Epoch: 3/59 - Loss: 4.7552 - Accuracy: 0.9171



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:12,  1.41it/s][A
 21%|██        | 4/19 [00:00<00:02,  5.94it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.76it/s][A
 42%|████▏     | 8/19 [00:01<00:01,  7.65it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  7.16it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.71it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.43it/s][A
 79%|███████▉  | 15/19 [00:02<00:00,  7.66it/s][A
 89%|████████▉ | 17/19 [00:02<00:00,  7.85it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.12it/s][A
Epochs:   5%|▌         | 3/59 [01:02<19:19, 20.70s/it]

Val Loss: 4.4608 - Val Accuracy: 0.9234



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:02,  1.21it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.73it/s][A
Training:   5%|▌         | 8/149 [00:01<00:18,  7.76it/s][A
Training:   7%|▋         | 10/149 [00:01<00:20,  6.66it/s][A
Training:   8%|▊         | 12/149 [00:01<00:17,  8.00it/s][A
Training:   9%|▉         | 14/149 [00:02<00:20,  6.68it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:20,  6.51it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:14,  9.00it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:17,  7.19it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:17,  7.05it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:15,  7.88it/s][A
Training:  21%|██▏       | 32/149 [00:04<00:11,  9.91it/s][A
Training:  23%|██▎       | 34/149 [00:04<00:13,  8.39it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:14,  7.62it/s][A
Training:  27%|██▋       | 40/149 [00:05<00:11,  9.75it/s][A
Training:  28%|██▊ 

Epoch: 4/59 - Loss: 4.5720 - Accuracy: 0.9214



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.27it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.66it/s][A
 42%|████▏     | 8/19 [00:01<00:01,  7.77it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  6.39it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.55it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.72it/s][A
Epochs:   7%|▋         | 4/59 [01:23<19:10, 20.92s/it]

Val Loss: 4.3491 - Val Accuracy: 0.9213



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:24,  1.03it/s][A
Training:   3%|▎         | 4/149 [00:01<00:31,  4.62it/s][A
Training:   4%|▍         | 6/149 [00:01<00:27,  5.23it/s][A
Training:   5%|▌         | 8/149 [00:01<00:19,  7.28it/s][A
Training:   7%|▋         | 10/149 [00:01<00:22,  6.16it/s][A
Training:   8%|▊         | 12/149 [00:02<00:18,  7.51it/s][A
Training:   9%|▉         | 14/149 [00:02<00:19,  6.97it/s][A
Training:  10%|█         | 15/149 [00:02<00:20,  6.64it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:18,  7.24it/s][A
Training:  13%|█▎        | 19/149 [00:03<00:17,  7.64it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:17,  7.14it/s][A
Training:  15%|█▌        | 23/149 [00:03<00:15,  8.15it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:15,  7.84it/s][A
Training:  18%|█▊        | 27/149 [00:03<00:13,  8.83it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:14,  8.49it/s][A
Training:  21%|██   

Epoch: 5/59 - Loss: 4.3716 - Accuracy: 0.9235



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:17,  1.01it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  3.97it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.51it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.56it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.46it/s][A
Epochs:   8%|▊         | 5/59 [01:44<18:54, 21.00s/it]

Val Loss: 4.9639 - Val Accuracy: 0.9134



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:08,  1.15it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.79it/s][A
Training:   6%|▌         | 9/149 [00:01<00:21,  6.55it/s][A
Training:   8%|▊         | 12/149 [00:01<00:15,  9.08it/s][A
Training:   9%|▉         | 14/149 [00:02<00:16,  8.27it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:16,  8.23it/s][A
Training:  14%|█▍        | 21/149 [00:02<00:14,  8.84it/s][A
Training:  15%|█▌        | 23/149 [00:02<00:12,  9.90it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:14,  8.51it/s][A
Training:  18%|█▊        | 27/149 [00:03<00:12,  9.74it/s][A
Training:  19%|█▉        | 29/149 [00:03<00:14,  8.06it/s][A
Training:  21%|██▏       | 32/149 [00:03<00:10, 10.74it/s][A
Training:  23%|██▎       | 34/149 [00:04<00:13,  8.25it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:13,  8.51it/s][A
Training:  26%|██▌       | 39/149 [00:04<00:11,  9.83it/s][A
Training:  28%|██▊ 

Epoch: 6/59 - Loss: 4.2280 - Accuracy: 0.9254



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:16,  1.12it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.66it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.12it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.87it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.89it/s][A
Epochs:  10%|█         | 6/59 [02:04<18:18, 20.72s/it]

Val Loss: 4.0878 - Val Accuracy: 0.9270



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:04,  1.19it/s][A
Training:   3%|▎         | 4/149 [00:00<00:27,  5.31it/s][A
Training:   4%|▍         | 6/149 [00:01<00:26,  5.50it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.10it/s][A
Training:   9%|▊         | 13/149 [00:02<00:20,  6.77it/s][A
Training:  11%|█         | 16/149 [00:02<00:14,  9.00it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:17,  7.55it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:16,  7.99it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:13,  9.00it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:10, 11.05it/s][A
Training:  20%|██        | 30/149 [00:03<00:13,  8.99it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:13,  8.38it/s][A
Training:  23%|██▎       | 35/149 [00:04<00:12,  9.29it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:14,  7.87it/s][A
Training:  26%|██▌       | 38/149 [00:05<00:14,  7.83it/s][A
Training:  28%|██▊  

Epoch: 7/59 - Loss: 4.1127 - Accuracy: 0.9286



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:12,  1.43it/s][A
 11%|█         | 2/19 [00:00<00:06,  2.83it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  5.67it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  6.02it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  7.41it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  6.67it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  8.00it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.36it/s][A
 89%|████████▉ | 17/19 [00:02<00:00,  9.28it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.89it/s][A
Epochs:  12%|█▏        | 7/59 [02:25<17:54, 20.66s/it]

Val Loss: 4.0224 - Val Accuracy: 0.9295



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:04,  1.19it/s][A
Training:   3%|▎         | 5/149 [00:01<00:33,  4.33it/s][A
Training:   6%|▌         | 9/149 [00:01<00:23,  6.02it/s][A
Training:   8%|▊         | 12/149 [00:01<00:16,  8.44it/s][A
Training:   9%|▉         | 14/149 [00:02<00:18,  7.36it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:17,  7.56it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:16,  7.97it/s][A
Training:  16%|█▌        | 24/149 [00:03<00:12,  9.99it/s][A
Training:  17%|█▋        | 26/149 [00:03<00:15,  8.14it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:15,  7.61it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:15,  7.52it/s][A
Training:  24%|██▍       | 36/149 [00:04<00:11,  9.56it/s][A
Training:  26%|██▌       | 38/149 [00:05<00:13,  8.52it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:14,  7.71it/s][A
Training:  29%|██▉       | 43/149 [00:05<00:12,  8.79it/s][A
Training:  30%|███ 

Epoch: 8/59 - Loss: 4.0547 - Accuracy: 0.9289



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:12,  1.49it/s][A
 21%|██        | 4/19 [00:00<00:02,  6.41it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.39it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.90it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  7.13it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.11it/s][A
Epochs:  14%|█▎        | 8/59 [02:46<17:34, 20.68s/it]

Val Loss: 4.0433 - Val Accuracy: 0.9292



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:44,  1.41it/s][A
Training:   3%|▎         | 4/149 [00:00<00:23,  6.15it/s][A
Training:   4%|▍         | 6/149 [00:01<00:24,  5.75it/s][A
Training:   5%|▌         | 8/149 [00:01<00:18,  7.60it/s][A
Training:   7%|▋         | 10/149 [00:01<00:21,  6.53it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  6.98it/s][A
Training:  11%|█         | 16/149 [00:02<00:14,  9.42it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:18,  7.13it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:17,  7.43it/s][A
Training:  15%|█▌        | 23/149 [00:03<00:14,  8.77it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:14,  8.37it/s][A
Training:  19%|█▉        | 29/149 [00:03<00:13,  8.61it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:12,  8.93it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:13,  8.50it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:12,  8.40it/s][A
Training:  30%|██▉  

Epoch: 9/59 - Loss: 4.1130 - Accuracy: 0.9283



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:13,  1.29it/s][A
 16%|█▌        | 3/19 [00:00<00:03,  4.08it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.66it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.17it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.51it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  6.12it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  7.87it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  8.02it/s][A
 89%|████████▉ | 17/19 [00:02<00:00,  8.52it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.65it/s][A
Epochs:  15%|█▌        | 9/59 [03:06<17:11, 20.64s/it]

Val Loss: 4.2744 - Val Accuracy: 0.9201



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:56,  1.27it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.73it/s][A
Training:   5%|▌         | 8/149 [00:01<00:18,  7.81it/s][A
Training:   7%|▋         | 10/149 [00:01<00:20,  6.64it/s][A
Training:   9%|▊         | 13/149 [00:02<00:18,  7.21it/s][A
Training:  11%|█         | 16/149 [00:02<00:13,  9.85it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:16,  7.74it/s][A
Training:  14%|█▍        | 21/149 [00:02<00:14,  8.76it/s][A
Training:  15%|█▌        | 23/149 [00:03<00:12,  9.74it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:12, 10.03it/s][A
Training:  18%|█▊        | 27/149 [00:03<00:12,  9.88it/s][A
Training:  19%|█▉        | 29/149 [00:03<00:13,  8.65it/s][A
Training:  20%|██        | 30/149 [00:03<00:14,  8.48it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:11,  9.75it/s][A
Training:  23%|██▎       | 35/149 [00:04<00:12,  8.99it/s][A
Training:  25%|██▍ 

Epoch: 10/59 - Loss: 3.8983 - Accuracy: 0.9299



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:13,  1.29it/s][A
 21%|██        | 4/19 [00:00<00:02,  5.65it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.13it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.85it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  7.30it/s][A
 84%|████████▍ | 16/19 [00:02<00:00,  9.80it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.95it/s][A
Epochs:  17%|█▋        | 10/59 [03:26<16:42, 20.46s/it]

Val Loss: 4.0664 - Val Accuracy: 0.9252



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:05,  1.18it/s][A
Training:   3%|▎         | 4/149 [00:00<00:28,  5.11it/s][A
Training:   4%|▍         | 6/149 [00:01<00:26,  5.32it/s][A
Training:   6%|▌         | 9/149 [00:01<00:24,  5.80it/s][A
Training:   9%|▊         | 13/149 [00:02<00:20,  6.74it/s][A
Training:  11%|█         | 16/149 [00:02<00:15,  8.75it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:17,  7.42it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:19,  6.42it/s][A
Training:  15%|█▌        | 23/149 [00:03<00:16,  7.52it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:19,  6.32it/s][A
Training:  18%|█▊        | 27/149 [00:04<00:17,  7.08it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:19,  6.05it/s][A
Training:  21%|██        | 31/149 [00:04<00:17,  6.87it/s][A
Training:  22%|██▏       | 33/149 [00:05<00:17,  6.68it/s][A
Training:  23%|██▎       | 35/149 [00:05<00:18,  6.23it/s][A
Training:  25%|██▍  

Epoch: 11/59 - Loss: 3.8897 - Accuracy: 0.9307



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:22,  1.25s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  2.76it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.47it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  3.91it/s][A
 37%|███▋      | 7/19 [00:02<00:02,  4.62it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.39it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  4.87it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  5.25it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.42it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  5.76it/s][A
 89%|████████▉ | 17/19 [00:04<00:00,  4.97it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.32it/s][A
Epochs:  19%|█▊        | 11/59 [03:54<18:08, 22.69s/it]

Val Loss: 4.0037 - Val Accuracy: 0.9252



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:14,  1.32s/it][A
Training:   2%|▏         | 3/149 [00:01<00:55,  2.64it/s][A
Training:   3%|▎         | 5/149 [00:02<00:54,  2.64it/s][A
Training:   5%|▍         | 7/149 [00:02<00:34,  4.09it/s][A
Training:   6%|▌         | 9/149 [00:02<00:38,  3.66it/s][A
Training:   9%|▊         | 13/149 [00:03<00:28,  4.72it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:26,  4.93it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:22,  5.75it/s][A
Training:  14%|█▍        | 21/149 [00:05<00:28,  4.52it/s][A
Training:  15%|█▌        | 23/149 [00:05<00:22,  5.56it/s][A
Training:  17%|█▋        | 25/149 [00:06<00:27,  4.46it/s][A
Training:  18%|█▊        | 27/149 [00:06<00:21,  5.55it/s][A
Training:  19%|█▉        | 29/149 [00:06<00:25,  4.75it/s][A
Training:  21%|██        | 31/149 [00:06<00:20,  5.86it/s][A
Training:  22%|██▏       | 33/149 [00:07<00:21,  5.38it/s][A
Training:  24%|██▍   

Epoch: 12/59 - Loss: 3.9637 - Accuracy: 0.9296



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.22s/it][A
 11%|█         | 2/19 [00:01<00:10,  1.55it/s][A
 21%|██        | 4/19 [00:01<00:04,  3.66it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.46it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.55it/s][A
 53%|█████▎    | 10/19 [00:03<00:02,  3.93it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  5.34it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.53it/s][A
 89%|████████▉ | 17/19 [00:03<00:00,  6.22it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.31it/s][A
Epochs:  20%|██        | 12/59 [04:26<20:03, 25.60s/it]

Val Loss: 5.1095 - Val Accuracy: 0.9057



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:21,  1.36s/it][A
Training:   1%|▏         | 2/149 [00:01<01:31,  1.61it/s][A
Training:   2%|▏         | 3/149 [00:01<01:07,  2.18it/s][A
Training:   3%|▎         | 5/149 [00:02<00:48,  3.00it/s][A
Training:   5%|▍         | 7/149 [00:02<00:34,  4.06it/s][A
Training:   6%|▌         | 9/149 [00:03<00:36,  3.82it/s][A
Training:   7%|▋         | 11/149 [00:03<00:28,  4.80it/s][A
Training:   9%|▊         | 13/149 [00:03<00:32,  4.20it/s][A
Training:  10%|█         | 15/149 [00:04<00:24,  5.39it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:26,  5.01it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:23,  5.63it/s][A
Training:  14%|█▍        | 21/149 [00:05<00:22,  5.67it/s][A
Training:  15%|█▌        | 23/149 [00:05<00:20,  6.03it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:21,  5.82it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:19,  6.22it/s][A
Training:  18%|█▊     

Epoch: 13/59 - Loss: 3.7961 - Accuracy: 0.9305



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:23,  1.32s/it][A
 16%|█▌        | 3/19 [00:01<00:06,  2.59it/s][A
 26%|██▋       | 5/19 [00:02<00:04,  2.94it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.87it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.31it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.13it/s][A
Epochs:  22%|██▏       | 13/59 [04:59<21:20, 27.84s/it]

Val Loss: 3.9644 - Val Accuracy: 0.9228



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:04,  1.25s/it][A
Training:   3%|▎         | 4/149 [00:01<00:41,  3.53it/s][A
Training:   4%|▍         | 6/149 [00:02<00:41,  3.46it/s][A
Training:   6%|▌         | 9/149 [00:02<00:35,  3.96it/s][A
Training:   8%|▊         | 12/149 [00:02<00:22,  6.00it/s][A
Training:   9%|▉         | 14/149 [00:03<00:28,  4.67it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:30,  4.33it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:21,  6.01it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:22,  5.75it/s][A
Training:  16%|█▌        | 24/149 [00:04<00:17,  7.00it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:23,  5.19it/s][A
Training:  19%|█▉        | 29/149 [00:06<00:23,  5.22it/s][A
Training:  21%|██        | 31/149 [00:06<00:18,  6.43it/s][A
Training:  22%|██▏       | 33/149 [00:06<00:21,  5.42it/s][A
Training:  23%|██▎       | 35/149 [00:06<00:17,  6.65it/s][A
Training:  25%|██▍  

Epoch: 14/59 - Loss: 3.6160 - Accuracy: 0.9348



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:19,  1.08s/it][A
 11%|█         | 2/19 [00:01<00:09,  1.87it/s][A
 21%|██        | 4/19 [00:01<00:04,  3.47it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.44it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  3.89it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  5.26it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.84it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.48it/s][A
 63%|██████▎   | 12/19 [00:03<00:01,  5.35it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.83it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  5.05it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.53it/s][A
Epochs:  24%|██▎       | 14/59 [05:31<21:42, 28.94s/it]

Val Loss: 4.2664 - Val Accuracy: 0.9211



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:55,  1.19s/it][A
Training:   3%|▎         | 4/149 [00:01<00:38,  3.76it/s][A
Training:   4%|▍         | 6/149 [00:01<00:36,  3.90it/s][A
Training:   6%|▌         | 9/149 [00:02<00:29,  4.74it/s][A
Training:   7%|▋         | 11/149 [00:02<00:23,  5.80it/s][A
Training:   9%|▊         | 13/149 [00:03<00:26,  5.07it/s][A
Training:   9%|▉         | 14/149 [00:03<00:24,  5.44it/s][A
Training:  11%|█         | 16/149 [00:03<00:20,  6.56it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:27,  4.87it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:26,  4.87it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:25,  4.97it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:25,  4.98it/s][A
Training:  16%|█▌        | 24/149 [00:04<00:19,  6.29it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:26,  4.65it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:25,  4.79it/s][A
Training:  19%|█▉   

Epoch: 15/59 - Loss: 3.6840 - Accuracy: 0.9344



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.22s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  2.71it/s][A
 26%|██▋       | 5/19 [00:02<00:04,  2.89it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.70it/s][A
 58%|█████▊    | 11/19 [00:03<00:01,  4.82it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  3.63it/s][A
 84%|████████▍ | 16/19 [00:04<00:00,  5.47it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.03it/s][A
Epochs:  25%|██▌       | 15/59 [06:04<22:12, 30.29s/it]

Val Loss: 3.8713 - Val Accuracy: 0.9295



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:33,  1.04s/it][A
Training:   1%|▏         | 2/149 [00:01<01:18,  1.87it/s][A
Training:   3%|▎         | 5/149 [00:01<00:36,  3.99it/s][A
Training:   4%|▍         | 6/149 [00:02<00:42,  3.33it/s][A
Training:   6%|▌         | 9/149 [00:02<00:27,  5.18it/s][A
Training:   7%|▋         | 10/149 [00:02<00:34,  4.06it/s][A
Training:   8%|▊         | 12/149 [00:02<00:24,  5.63it/s][A
Training:   9%|▊         | 13/149 [00:03<00:23,  5.86it/s][A
Training:   9%|▉         | 14/149 [00:03<00:36,  3.68it/s][A
Training:  11%|█         | 16/149 [00:03<00:25,  5.32it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:29,  4.46it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:21,  5.98it/s][A
Training:  15%|█▍        | 22/149 [00:05<00:27,  4.62it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:18,  6.69it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:22,  5.43it/s][A
Training:  19%|█▉    

Epoch: 16/59 - Loss: 3.6652 - Accuracy: 0.9339



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:23,  1.29s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.52it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.43it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.74it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  5.72it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.46it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.22it/s][A
Epochs:  27%|██▋       | 16/59 [06:36<22:06, 30.85s/it]

Val Loss: 3.9471 - Val Accuracy: 0.9308



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:18,  1.34s/it][A
Training:   1%|▏         | 2/149 [00:01<01:31,  1.61it/s][A
Training:   3%|▎         | 5/149 [00:01<00:42,  3.38it/s][A
Training:   4%|▍         | 6/149 [00:02<00:36,  3.90it/s][A
Training:   6%|▌         | 9/149 [00:02<00:30,  4.61it/s][A
Training:   7%|▋         | 10/149 [00:02<00:28,  4.89it/s][A
Training:   8%|▊         | 12/149 [00:02<00:20,  6.59it/s][A
Training:   9%|▊         | 13/149 [00:03<00:25,  5.25it/s][A
Training:   9%|▉         | 14/149 [00:03<00:29,  4.63it/s][A
Training:  11%|█         | 16/149 [00:03<00:20,  6.55it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:26,  4.94it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:33,  3.92it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:22,  5.67it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:21,  5.89it/s][A
Training:  15%|█▍        | 22/149 [00:05<00:28,  4.41it/s][A
Training:  17%|█▋    

Epoch: 17/59 - Loss: 3.6588 - Accuracy: 0.9322



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:19,  1.10s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  3.01it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.10it/s][A
 42%|████▏     | 8/19 [00:02<00:01,  5.77it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.19it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  5.40it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.43it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.41it/s][A
Epochs:  29%|██▉       | 17/59 [07:09<22:00, 31.43s/it]

Val Loss: 4.9531 - Val Accuracy: 0.9105



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:52,  1.16s/it][A
Training:   1%|▏         | 2/149 [00:01<01:19,  1.84it/s][A
Training:   3%|▎         | 4/149 [00:01<00:34,  4.25it/s][A
Training:   4%|▍         | 6/149 [00:01<00:34,  4.17it/s][A
Training:   5%|▌         | 8/149 [00:02<00:23,  5.90it/s][A
Training:   7%|▋         | 10/149 [00:02<00:32,  4.34it/s][A
Training:   9%|▊         | 13/149 [00:03<00:28,  4.85it/s][A
Training:  10%|█         | 15/149 [00:03<00:21,  6.21it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:27,  4.88it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:21,  5.96it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:18,  6.88it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:22,  5.50it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:18,  6.59it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:19,  6.03it/s][A
Training:  21%|██        | 31/149 [00:05<00:16,  7.00it/s][A
Training:  22%|██▏   

Epoch: 18/59 - Loss: 3.5573 - Accuracy: 0.9352



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.22s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.73it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.45it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  5.05it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.38it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  5.20it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  3.76it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.52it/s][A
Epochs:  31%|███       | 18/59 [07:40<21:18, 31.19s/it]

Val Loss: 3.9662 - Val Accuracy: 0.9246



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:18,  1.34s/it][A
Training:   2%|▏         | 3/149 [00:01<00:56,  2.58it/s][A
Training:   3%|▎         | 5/149 [00:02<00:48,  2.95it/s][A
Training:   5%|▍         | 7/149 [00:02<00:31,  4.54it/s][A
Training:   6%|▌         | 9/149 [00:02<00:34,  4.02it/s][A
Training:   7%|▋         | 11/149 [00:02<00:26,  5.29it/s][A
Training:   9%|▊         | 13/149 [00:03<00:27,  4.99it/s][A
Training:   9%|▉         | 14/149 [00:03<00:26,  5.09it/s][A
Training:  11%|█         | 16/149 [00:03<00:20,  6.55it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:29,  4.53it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:24,  5.41it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:22,  5.81it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:20,  6.11it/s][A
Training:  15%|█▌        | 23/149 [00:05<00:23,  5.28it/s][A
Training:  16%|█▌        | 24/149 [00:05<00:21,  5.82it/s][A
Training:  17%|█▋    

Epoch: 19/59 - Loss: 3.4714 - Accuracy: 0.9358



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:20,  1.15s/it][A
 11%|█         | 2/19 [00:01<00:09,  1.86it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.34it/s][A
 37%|███▋      | 7/19 [00:02<00:02,  4.97it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.08it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  5.26it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.35it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.66it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.33it/s][A
Epochs:  32%|███▏      | 19/59 [08:13<21:07, 31.68s/it]

Val Loss: 4.0964 - Val Accuracy: 0.9263



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:12,  1.30s/it][A
Training:   1%|▏         | 2/149 [00:01<01:28,  1.66it/s][A
Training:   3%|▎         | 4/149 [00:01<00:41,  3.51it/s][A
Training:   3%|▎         | 5/149 [00:02<00:47,  3.01it/s][A
Training:   5%|▍         | 7/149 [00:02<00:29,  4.79it/s][A
Training:   6%|▌         | 9/149 [00:02<00:35,  3.98it/s][A
Training:   7%|▋         | 11/149 [00:02<00:25,  5.33it/s][A
Training:   9%|▊         | 13/149 [00:03<00:31,  4.35it/s][A
Training:   9%|▉         | 14/149 [00:03<00:27,  4.87it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:28,  4.57it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:22,  5.85it/s][A
Training:  14%|█▍        | 21/149 [00:05<00:27,  4.64it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:23,  5.29it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:19,  6.41it/s][A
Training:  19%|█▉        | 29/149 [00:06<00:23,  5.05it/s][A
Training:  21%|██     

Epoch: 20/59 - Loss: 3.4790 - Accuracy: 0.9364



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:27,  1.53s/it][A
 26%|██▋       | 5/19 [00:02<00:04,  2.84it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.42it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  4.56it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.36it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  5.58it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.29it/s][A
Epochs:  34%|███▍      | 20/59 [08:46<20:51, 32.10s/it]

Val Loss: 4.0401 - Val Accuracy: 0.9323



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:59,  1.21s/it][A
Training:   2%|▏         | 3/149 [00:01<00:51,  2.81it/s][A
Training:   3%|▎         | 5/149 [00:01<00:44,  3.23it/s][A
Training:   5%|▍         | 7/149 [00:02<00:29,  4.81it/s][A
Training:   6%|▌         | 9/149 [00:02<00:30,  4.63it/s][A
Training:   7%|▋         | 10/149 [00:02<00:33,  4.20it/s][A
Training:   8%|▊         | 12/149 [00:02<00:23,  5.74it/s][A
Training:   9%|▊         | 13/149 [00:03<00:25,  5.39it/s][A
Training:   9%|▉         | 14/149 [00:03<00:33,  4.00it/s][A
Training:  11%|█         | 16/149 [00:03<00:23,  5.58it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:24,  5.37it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:31,  4.13it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:21,  5.88it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:21,  5.95it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:25,  5.05it/s][A
Training:  16%|█▌    

Epoch: 21/59 - Loss: 3.5590 - Accuracy: 0.9337



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:16,  1.11it/s][A
 11%|█         | 2/19 [00:01<00:07,  2.25it/s][A
 21%|██        | 4/19 [00:01<00:03,  4.12it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.21it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  3.75it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.51it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  6.08it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  5.08it/s][A
 74%|███████▎  | 14/19 [00:03<00:00,  5.42it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  7.02it/s][A
 89%|████████▉ | 17/19 [00:03<00:00,  4.94it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.58it/s][A
Epochs:  36%|███▌      | 21/59 [09:18<20:24, 32.23s/it]

Val Loss: 3.8954 - Val Accuracy: 0.9291



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:58,  1.20s/it][A
Training:   2%|▏         | 3/149 [00:01<00:51,  2.86it/s][A
Training:   3%|▎         | 5/149 [00:01<00:45,  3.14it/s][A
Training:   5%|▍         | 7/149 [00:02<00:29,  4.73it/s][A
Training:   6%|▌         | 9/149 [00:02<00:31,  4.42it/s][A
Training:   7%|▋         | 10/149 [00:02<00:27,  4.99it/s][A
Training:   8%|▊         | 12/149 [00:02<00:20,  6.71it/s][A
Training:   9%|▉         | 14/149 [00:03<00:28,  4.81it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:25,  5.09it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:24,  5.38it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:20,  6.23it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:21,  5.81it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:20,  5.99it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:21,  5.85it/s][A
Training:  19%|█▉        | 28/149 [00:05<00:16,  7.50it/s][A
Training:  19%|█▉    

Epoch: 22/59 - Loss: 3.5713 - Accuracy: 0.9342



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:19,  1.07s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  3.09it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  3.52it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  4.20it/s][A
 42%|████▏     | 8/19 [00:01<00:01,  5.87it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.99it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  4.60it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  6.22it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.47it/s][A
 74%|███████▎  | 14/19 [00:03<00:00,  5.02it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.53it/s][A
 89%|████████▉ | 17/19 [00:03<00:00,  4.78it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.53it/s][A
Epochs:  37%|███▋      | 22/59 [09:49<19:41, 31.92s/it]

Val Loss: 3.9715 - Val Accuracy: 0.9297



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:04,  1.25s/it][A
Training:   3%|▎         | 4/149 [00:01<00:39,  3.64it/s][A
Training:   4%|▍         | 6/149 [00:02<00:44,  3.21it/s][A
Training:   6%|▌         | 9/149 [00:02<00:36,  3.80it/s][A
Training:   7%|▋         | 10/149 [00:02<00:33,  4.17it/s][A
Training:   8%|▊         | 12/149 [00:03<00:24,  5.60it/s][A
Training:   9%|▊         | 13/149 [00:03<00:31,  4.33it/s][A
Training:   9%|▉         | 14/149 [00:03<00:28,  4.79it/s][A
Training:  11%|█         | 16/149 [00:03<00:20,  6.65it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:28,  4.57it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:21,  6.05it/s][A
Training:  15%|█▍        | 22/149 [00:05<00:28,  4.49it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:20,  5.98it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:27,  4.46it/s][A
Training:  19%|█▉        | 29/149 [00:06<00:20,  5.82it/s][A
Training:  20%|██   

Epoch: 23/59 - Loss: 3.4245 - Accuracy: 0.9371



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.22s/it][A
 26%|██▋       | 5/19 [00:01<00:04,  3.03it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.59it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  5.22it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.10it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.74it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.49it/s][A
Epochs:  39%|███▉      | 23/59 [10:21<19:07, 31.89s/it]

Val Loss: 4.2846 - Val Accuracy: 0.9256



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:18,  1.34s/it][A
Training:   2%|▏         | 3/149 [00:01<00:56,  2.59it/s][A
Training:   3%|▎         | 5/149 [00:01<00:43,  3.31it/s][A
Training:   6%|▌         | 9/149 [00:02<00:30,  4.62it/s][A
Training:   7%|▋         | 11/149 [00:02<00:23,  5.85it/s][A
Training:   9%|▊         | 13/149 [00:03<00:28,  4.79it/s][A
Training:  10%|█         | 15/149 [00:03<00:23,  5.79it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:25,  5.15it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:19,  6.58it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:22,  5.75it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:18,  6.94it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:21,  5.69it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:22,  5.50it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:20,  6.03it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:20,  5.86it/s][A
Training:  20%|██   

Epoch: 24/59 - Loss: 3.4763 - Accuracy: 0.9343



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:18,  1.03s/it][A
 11%|█         | 2/19 [00:01<00:08,  1.95it/s][A
 21%|██        | 4/19 [00:01<00:03,  4.46it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  4.07it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.97it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  4.84it/s][A
 68%|██████▊   | 13/19 [00:02<00:01,  5.60it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.51it/s][A
 89%|████████▉ | 17/19 [00:03<00:00,  5.67it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.54it/s][A
Epochs:  41%|████      | 24/59 [10:53<18:39, 31.98s/it]

Val Loss: 4.3203 - Val Accuracy: 0.9229



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:32,  1.44s/it][A
Training:   3%|▎         | 5/149 [00:02<00:50,  2.87it/s][A
Training:   5%|▍         | 7/149 [00:02<00:34,  4.16it/s][A
Training:   6%|▌         | 9/149 [00:02<00:33,  4.14it/s][A
Training:   7%|▋         | 11/149 [00:02<00:24,  5.62it/s][A
Training:   9%|▊         | 13/149 [00:03<00:24,  5.55it/s][A
Training:  11%|█         | 16/149 [00:03<00:16,  8.06it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:20,  6.45it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:17,  7.58it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:19,  6.41it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:20,  6.04it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:18,  6.85it/s][A
Training:  17%|█▋        | 26/149 [00:04<00:18,  6.61it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:22,  5.33it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:19,  6.13it/s][A
Training:  20%|██   

Epoch: 25/59 - Loss: 3.6165 - Accuracy: 0.9337



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:25,  1.39s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.34it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.28it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  4.79it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.47it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.71it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.88it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.56it/s][A
Epochs:  42%|████▏     | 25/59 [11:25<18:03, 31.87s/it]

Val Loss: 3.8413 - Val Accuracy: 0.9297



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:23,  1.38s/it][A
Training:   2%|▏         | 3/149 [00:01<00:58,  2.49it/s][A
Training:   3%|▎         | 5/149 [00:02<00:51,  2.82it/s][A
Training:   5%|▍         | 7/149 [00:02<00:33,  4.28it/s][A
Training:   6%|▌         | 9/149 [00:02<00:35,  3.99it/s][A
Training:   8%|▊         | 12/149 [00:02<00:21,  6.26it/s][A
Training:   9%|▉         | 14/149 [00:03<00:27,  4.88it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:25,  5.13it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:22,  5.64it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:18,  6.75it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:19,  6.32it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:16,  7.55it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:21,  5.61it/s][A
Training:  20%|██        | 30/149 [00:06<00:19,  5.96it/s][A
Training:  22%|██▏       | 33/149 [00:06<00:19,  5.82it/s][A
Training:  23%|██▎   

Epoch: 26/59 - Loss: 3.3596 - Accuracy: 0.9366



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.21s/it][A
 11%|█         | 2/19 [00:01<00:10,  1.65it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.36it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.71it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.03it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  6.04it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.84it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.61it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.50it/s][A
Epochs:  44%|████▍     | 26/59 [11:57<17:34, 31.95s/it]

Val Loss: 3.8324 - Val Accuracy: 0.9301



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:57,  1.60s/it][A
Training:   3%|▎         | 4/149 [00:01<00:50,  2.85it/s][A
Training:   4%|▍         | 6/149 [00:02<00:42,  3.35it/s][A
Training:   6%|▌         | 9/149 [00:02<00:34,  4.11it/s][A
Training:   9%|▊         | 13/149 [00:03<00:26,  5.09it/s][A
Training:   9%|▉         | 14/149 [00:03<00:24,  5.48it/s][A
Training:  11%|█         | 16/149 [00:03<00:20,  6.61it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:26,  5.04it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:27,  4.83it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:20,  6.16it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:23,  5.44it/s][A
Training:  16%|█▌        | 24/149 [00:05<00:18,  6.80it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:18,  6.54it/s][A
Training:  17%|█▋        | 26/149 [00:05<00:23,  5.16it/s][A
Training:  19%|█▉        | 28/149 [00:05<00:18,  6.54it/s][A
Training:  19%|█▉   

Epoch: 27/59 - Loss: 3.3472 - Accuracy: 0.9369



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:24,  1.33s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.43it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.38it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  4.44it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.79it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.07it/s][A
 63%|██████▎   | 12/19 [00:03<00:01,  5.32it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.97it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.91it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  5.56it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.41it/s][A
Epochs:  46%|████▌     | 27/59 [12:30<17:10, 32.22s/it]

Val Loss: 3.8497 - Val Accuracy: 0.9306



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:14,  1.31s/it][A
Training:   1%|▏         | 2/149 [00:01<01:35,  1.54it/s][A
Training:   3%|▎         | 4/149 [00:01<00:39,  3.64it/s][A
Training:   4%|▍         | 6/149 [00:02<00:39,  3.61it/s][A
Training:   5%|▍         | 7/149 [00:02<00:33,  4.27it/s][A
Training:   5%|▌         | 8/149 [00:02<00:28,  5.01it/s][A
Training:   6%|▌         | 9/149 [00:02<00:37,  3.78it/s][A
Training:   7%|▋         | 10/149 [00:02<00:31,  4.42it/s][A
Training:   9%|▊         | 13/149 [00:03<00:28,  4.79it/s][A
Training:   9%|▉         | 14/149 [00:03<00:25,  5.21it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:21,  6.06it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:17,  7.44it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:23,  5.41it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:18,  6.90it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:21,  5.66it/s][A
Training:  18%|█▊      

Epoch: 28/59 - Loss: 3.4082 - Accuracy: 0.9372



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:25,  1.42s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.21it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.27it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.58it/s][A
 63%|██████▎   | 12/19 [00:03<00:01,  5.48it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.64it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  5.94it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.27it/s][A
Epochs:  47%|████▋     | 28/59 [13:03<16:44, 32.41s/it]

Val Loss: 3.8602 - Val Accuracy: 0.9267



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:52,  1.17s/it][A
Training:   1%|▏         | 2/149 [00:01<01:21,  1.81it/s][A
Training:   2%|▏         | 3/149 [00:01<00:52,  2.76it/s][A
Training:   3%|▎         | 5/149 [00:01<00:37,  3.84it/s][A
Training:   4%|▍         | 6/149 [00:01<00:33,  4.32it/s][A
Training:   5%|▍         | 7/149 [00:02<00:34,  4.16it/s][A
Training:   6%|▌         | 9/149 [00:02<00:30,  4.66it/s][A
Training:   7%|▋         | 11/149 [00:02<00:28,  4.91it/s][A
Training:   9%|▊         | 13/149 [00:03<00:23,  5.73it/s][A
Training:  10%|█         | 15/149 [00:03<00:22,  5.91it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:21,  6.16it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:20,  6.22it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:18,  6.75it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:17,  7.06it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:21,  5.88it/s][A
Training:  17%|█▋      

Epoch: 29/59 - Loss: 3.3234 - Accuracy: 0.9367



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.22s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  2.75it/s][A
 26%|██▋       | 5/19 [00:02<00:05,  2.73it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  4.83it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.32it/s][A
 63%|██████▎   | 12/19 [00:03<00:01,  5.24it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.02it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.25it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.42it/s][A
Epochs:  49%|████▉     | 29/59 [13:35<16:12, 32.41s/it]

Val Loss: 4.7566 - Val Accuracy: 0.9155



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:52,  1.17s/it][A
Training:   3%|▎         | 5/149 [00:01<00:46,  3.11it/s][A
Training:   5%|▍         | 7/149 [00:02<00:31,  4.44it/s][A
Training:   6%|▌         | 9/149 [00:02<00:31,  4.42it/s][A
Training:   8%|▊         | 12/149 [00:02<00:19,  6.96it/s][A
Training:   9%|▉         | 14/149 [00:03<00:24,  5.56it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:23,  5.64it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:16,  7.64it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:21,  5.82it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:21,  5.69it/s][A
Training:  19%|█▉        | 28/149 [00:05<00:15,  7.60it/s][A
Training:  20%|██        | 30/149 [00:05<00:21,  5.66it/s][A
Training:  21%|██▏       | 32/149 [00:05<00:17,  6.80it/s][A
Training:  23%|██▎       | 34/149 [00:06<00:22,  5.21it/s][A
Training:  24%|██▍       | 36/149 [00:06<00:17,  6.49it/s][A
Training:  26%|██▌  

Epoch: 30/59 - Loss: 3.3957 - Accuracy: 0.9361



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:25,  1.41s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.32it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.55it/s][A
 37%|███▋      | 7/19 [00:02<00:02,  4.02it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  3.68it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  5.80it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.28it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.17it/s][A
Epochs:  51%|█████     | 30/59 [14:07<15:30, 32.10s/it]

Val Loss: 3.9463 - Val Accuracy: 0.9269



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:36,  1.06s/it][A
Training:   3%|▎         | 4/149 [00:01<00:34,  4.23it/s][A
Training:   4%|▍         | 6/149 [00:01<00:38,  3.69it/s][A
Training:   5%|▌         | 8/149 [00:02<00:28,  4.93it/s][A
Training:   6%|▌         | 9/149 [00:02<00:34,  4.06it/s][A
Training:   8%|▊         | 12/149 [00:02<00:27,  5.01it/s][A
Training:   9%|▊         | 13/149 [00:03<00:30,  4.52it/s][A
Training:  10%|█         | 15/149 [00:03<00:22,  5.94it/s][A
Training:  11%|█         | 16/149 [00:03<00:24,  5.41it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:29,  4.44it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:20,  6.31it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:25,  5.04it/s][A
Training:  16%|█▌        | 24/149 [00:04<00:17,  7.02it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:22,  5.50it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:17,  6.98it/s][A
Training:  19%|█▉    

Epoch: 31/59 - Loss: 3.3387 - Accuracy: 0.9357



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:18,  1.00s/it][A
 11%|█         | 2/19 [00:01<00:09,  1.80it/s][A
 16%|█▌        | 3/19 [00:01<00:06,  2.59it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  3.91it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  3.91it/s][A
 37%|███▋      | 7/19 [00:02<00:02,  4.58it/s][A
 47%|████▋     | 9/19 [00:02<00:01,  5.15it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  4.55it/s][A
 58%|█████▊    | 11/19 [00:03<00:01,  4.41it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.94it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.53it/s][A
 89%|████████▉ | 17/19 [00:03<00:00,  6.05it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.35it/s][A
Epochs:  53%|█████▎    | 31/59 [14:39<15:00, 32.15s/it]

Val Loss: 3.9406 - Val Accuracy: 0.9274



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:28,  1.00s/it][A
Training:   2%|▏         | 3/149 [00:01<00:46,  3.16it/s][A
Training:   3%|▎         | 5/149 [00:01<00:41,  3.48it/s][A
Training:   5%|▍         | 7/149 [00:01<00:27,  5.18it/s][A
Training:   6%|▌         | 9/149 [00:02<00:34,  4.03it/s][A
Training:   7%|▋         | 10/149 [00:02<00:30,  4.57it/s][A
Training:   8%|▊         | 12/149 [00:02<00:22,  6.10it/s][A
Training:   9%|▊         | 13/149 [00:03<00:34,  3.97it/s][A
Training:  10%|█         | 15/149 [00:03<00:24,  5.45it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:29,  4.47it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:19,  6.76it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:24,  5.27it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:23,  5.37it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:20,  5.75it/s][A
Training:  22%|██▏       | 33/149 [00:06<00:19,  5.93it/s][A
Training:  23%|██▎   

Epoch: 32/59 - Loss: 3.4898 - Accuracy: 0.9341



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:24,  1.37s/it][A
 16%|█▌        | 3/19 [00:01<00:06,  2.55it/s][A
 26%|██▋       | 5/19 [00:02<00:05,  2.64it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.01it/s][A
 58%|█████▊    | 11/19 [00:03<00:01,  5.10it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.44it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  5.54it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.21it/s][A
Epochs:  54%|█████▍    | 32/59 [15:11<14:25, 32.06s/it]

Val Loss: 3.8752 - Val Accuracy: 0.9323



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:12,  1.30s/it][A
Training:   3%|▎         | 4/149 [00:01<00:40,  3.55it/s][A
Training:   4%|▍         | 6/149 [00:01<00:36,  3.89it/s][A
Training:   6%|▌         | 9/149 [00:02<00:31,  4.45it/s][A
Training:   8%|▊         | 12/149 [00:02<00:21,  6.50it/s][A
Training:   9%|▉         | 14/149 [00:03<00:26,  5.05it/s][A
Training:  10%|█         | 15/149 [00:03<00:25,  5.26it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:26,  4.95it/s][A
Training:  13%|█▎        | 19/149 [00:04<00:24,  5.40it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:23,  5.36it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:21,  5.77it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:24,  5.10it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:20,  5.81it/s][A
Training:  19%|█▉        | 28/149 [00:05<00:20,  6.01it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:20,  5.78it/s][A
Training:  21%|██   

Epoch: 33/59 - Loss: 3.4554 - Accuracy: 0.9354



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:20,  1.14s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  2.75it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.31it/s][A
 37%|███▋      | 7/19 [00:02<00:02,  4.47it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.33it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  5.09it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  5.13it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  5.97it/s][A
100%|██████████| 19/19 [00:03<00:00,  4.79it/s][A
Epochs:  56%|█████▌    | 33/59 [15:42<13:49, 31.90s/it]

Val Loss: 3.8861 - Val Accuracy: 0.9302



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:40,  1.09s/it][A
Training:   1%|▏         | 2/149 [00:01<01:15,  1.95it/s][A
Training:   3%|▎         | 5/149 [00:01<00:36,  3.95it/s][A
Training:   5%|▍         | 7/149 [00:01<00:24,  5.77it/s][A
Training:   6%|▌         | 9/149 [00:02<00:30,  4.64it/s][A
Training:   8%|▊         | 12/149 [00:02<00:20,  6.77it/s][A
Training:   9%|▉         | 14/149 [00:02<00:21,  6.30it/s][A
Training:  11%|█         | 16/149 [00:03<00:18,  7.37it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:20,  6.44it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:19,  6.75it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:19,  6.46it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:19,  6.57it/s][A
Training:  16%|█▌        | 24/149 [00:04<00:20,  6.17it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:19,  6.24it/s][A
Training:  18%|█▊        | 27/149 [00:04<00:17,  7.04it/s][A
Training:  19%|█▉    

Epoch: 34/59 - Loss: 3.1899 - Accuracy: 0.9385



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:23,  1.33s/it][A
 26%|██▋       | 5/19 [00:01<00:04,  3.10it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.57it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.08it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  5.42it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.03it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  6.17it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.32it/s][A
Epochs:  58%|█████▊    | 34/59 [16:13<13:11, 31.67s/it]

Val Loss: 3.8703 - Val Accuracy: 0.9259



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:51,  1.16s/it][A
Training:   2%|▏         | 3/149 [00:01<00:49,  2.94it/s][A
Training:   3%|▎         | 5/149 [00:01<00:39,  3.65it/s][A
Training:   5%|▌         | 8/149 [00:01<00:24,  5.75it/s][A
Training:   6%|▌         | 9/149 [00:02<00:29,  4.67it/s][A
Training:   7%|▋         | 11/149 [00:02<00:23,  5.93it/s][A
Training:   8%|▊         | 12/149 [00:02<00:23,  5.84it/s][A
Training:   9%|▊         | 13/149 [00:02<00:24,  5.49it/s][A
Training:   9%|▉         | 14/149 [00:03<00:26,  5.01it/s][A
Training:  11%|█         | 16/149 [00:03<00:23,  5.77it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:22,  5.97it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:25,  5.21it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:21,  6.08it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:24,  5.29it/s][A
Training:  16%|█▌        | 24/149 [00:04<00:19,  6.36it/s][A
Training:  17%|█▋    

Epoch: 35/59 - Loss: 3.1723 - Accuracy: 0.9398



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:18,  1.00s/it][A
 11%|█         | 2/19 [00:01<00:08,  1.98it/s][A
 21%|██        | 4/19 [00:01<00:03,  4.51it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  3.67it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  5.32it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  4.15it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  5.51it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.24it/s][A
 84%|████████▍ | 16/19 [00:03<00:00,  5.61it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.45it/s][A
Epochs:  59%|█████▉    | 35/59 [16:45<12:38, 31.59s/it]

Val Loss: 3.8980 - Val Accuracy: 0.9313



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:44,  1.11s/it][A
Training:   2%|▏         | 3/149 [00:01<00:48,  3.02it/s][A
Training:   3%|▎         | 5/149 [00:01<00:41,  3.49it/s][A
Training:   4%|▍         | 6/149 [00:01<00:33,  4.23it/s][A
Training:   5%|▌         | 8/149 [00:02<00:24,  5.74it/s][A
Training:   6%|▌         | 9/149 [00:02<00:32,  4.35it/s][A
Training:   7%|▋         | 11/149 [00:02<00:25,  5.40it/s][A
Training:   8%|▊         | 12/149 [00:02<00:23,  5.75it/s][A
Training:   9%|▊         | 13/149 [00:03<00:30,  4.51it/s][A
Training:   9%|▉         | 14/149 [00:03<00:28,  4.69it/s][A
Training:  11%|█         | 16/149 [00:03<00:23,  5.55it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:29,  4.53it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:29,  4.40it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:21,  6.06it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:25,  5.07it/s][A
Training:  15%|█▍     

Epoch: 36/59 - Loss: 3.2199 - Accuracy: 0.9397



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:22,  1.27s/it][A
 21%|██        | 4/19 [00:01<00:04,  3.71it/s][A
 32%|███▏      | 6/19 [00:02<00:03,  3.45it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  4.95it/s][A
 53%|█████▎    | 10/19 [00:02<00:02,  3.78it/s][A
 63%|██████▎   | 12/19 [00:03<00:01,  5.17it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.38it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  4.67it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.34it/s][A
Epochs:  61%|██████    | 36/59 [17:17<12:09, 31.73s/it]

Val Loss: 4.0206 - Val Accuracy: 0.9257



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:56,  1.19s/it][A
Training:   1%|▏         | 2/149 [00:01<01:23,  1.76it/s][A
Training:   3%|▎         | 4/149 [00:01<00:39,  3.70it/s][A
Training:   3%|▎         | 5/149 [00:01<00:43,  3.29it/s][A
Training:   4%|▍         | 6/149 [00:02<00:34,  4.09it/s][A
Training:   5%|▌         | 8/149 [00:02<00:27,  5.10it/s][A
Training:   6%|▌         | 9/149 [00:02<00:32,  4.30it/s][A
Training:   7%|▋         | 10/149 [00:02<00:32,  4.33it/s][A
Training:   8%|▊         | 12/149 [00:03<00:22,  5.99it/s][A
Training:   9%|▊         | 13/149 [00:03<00:23,  5.74it/s][A
Training:   9%|▉         | 14/149 [00:03<00:27,  4.92it/s][A
Training:  11%|█         | 16/149 [00:03<00:21,  6.26it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:21,  6.27it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:27,  4.76it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:22,  5.82it/s][A
Training:  14%|█▍      

Epoch: 37/59 - Loss: 3.2656 - Accuracy: 0.9381



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:22,  1.24s/it][A
 16%|█▌        | 3/19 [00:01<00:06,  2.59it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  3.26it/s][A
 37%|███▋      | 7/19 [00:01<00:02,  5.03it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.18it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  5.10it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.84it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.73it/s][A
 89%|████████▉ | 17/19 [00:04<00:00,  4.90it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.20it/s][A
Epochs:  63%|██████▎   | 37/59 [17:49<11:41, 31.89s/it]

Val Loss: 3.9265 - Val Accuracy: 0.9301



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:35,  1.05s/it][A
Training:   1%|▏         | 2/149 [00:01<01:18,  1.86it/s][A
Training:   3%|▎         | 4/149 [00:01<00:33,  4.27it/s][A
Training:   4%|▍         | 6/149 [00:01<00:32,  4.35it/s][A
Training:   6%|▌         | 9/149 [00:02<00:26,  5.28it/s][A
Training:   7%|▋         | 10/149 [00:02<00:25,  5.52it/s][A
Training:   7%|▋         | 11/149 [00:02<00:23,  5.92it/s][A
Training:   9%|▊         | 13/149 [00:02<00:24,  5.65it/s][A
Training:   9%|▉         | 14/149 [00:03<00:22,  5.93it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:21,  6.08it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:21,  6.03it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:17,  7.53it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:19,  6.53it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:23,  5.44it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:22,  5.66it/s][A
Training:  17%|█▋    

Epoch: 38/59 - Loss: 3.2389 - Accuracy: 0.9380



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.21s/it][A
 16%|█▌        | 3/19 [00:01<00:06,  2.66it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  2.99it/s][A
 42%|████▏     | 8/19 [00:02<00:02,  5.31it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  4.57it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  4.74it/s][A
 68%|██████▊   | 13/19 [00:03<00:01,  4.66it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  5.38it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.43it/s][A
Epochs:  64%|██████▍   | 38/59 [18:20<11:03, 31.59s/it]

Val Loss: 3.9126 - Val Accuracy: 0.9252



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:21,  1.05it/s][A
Training:   1%|▏         | 2/149 [00:01<01:20,  1.83it/s][A
Training:   2%|▏         | 3/149 [00:01<00:52,  2.76it/s][A
Training:   3%|▎         | 4/149 [00:01<00:38,  3.73it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.65it/s][A
Training:   4%|▍         | 6/149 [00:01<00:37,  3.77it/s][A
Training:   5%|▌         | 8/149 [00:02<00:22,  6.19it/s][A
Training:   6%|▌         | 9/149 [00:02<00:30,  4.60it/s][A
Training:   7%|▋         | 10/149 [00:02<00:36,  3.80it/s][A
Training:   8%|▊         | 12/149 [00:02<00:24,  5.68it/s][A
Training:   9%|▊         | 13/149 [00:03<00:25,  5.29it/s][A
Training:   9%|▉         | 14/149 [00:03<00:31,  4.23it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:23,  5.51it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:25,  5.07it/s][A
Training:  13%|█▎        | 20/149 [00:04<00:20,  6.40it/s][A
Training:  14%|█▍       

Epoch: 39/59 - Loss: 3.2035 - Accuracy: 0.9381



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:21,  1.17s/it][A
 16%|█▌        | 3/19 [00:01<00:05,  2.84it/s][A
 26%|██▋       | 5/19 [00:01<00:04,  2.97it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.13it/s][A
 63%|██████▎   | 12/19 [00:02<00:01,  6.17it/s][A
 74%|███████▎  | 14/19 [00:03<00:01,  4.78it/s][A
100%|██████████| 19/19 [00:04<00:00,  4.46it/s][A
Epochs:  66%|██████▌   | 39/59 [18:51<10:30, 31.53s/it]

Val Loss: 3.8587 - Val Accuracy: 0.9301



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<03:03,  1.24s/it][A
Training:   1%|▏         | 2/149 [00:01<01:24,  1.74it/s][A
Training:   3%|▎         | 4/149 [00:01<00:36,  3.93it/s][A
Training:   4%|▍         | 6/149 [00:02<00:38,  3.72it/s][A
Training:   5%|▌         | 8/149 [00:02<00:26,  5.26it/s][A
Training:   6%|▌         | 9/149 [00:02<00:35,  3.95it/s][A
Training:   9%|▊         | 13/149 [00:03<00:29,  4.59it/s][A
Training:   9%|▉         | 14/149 [00:03<00:27,  4.98it/s][A
Training:  11%|█         | 16/149 [00:03<00:21,  6.30it/s][A
Training:  11%|█▏        | 17/149 [00:04<00:29,  4.46it/s][A
Training:  12%|█▏        | 18/149 [00:04<00:27,  4.74it/s][A
Training:  14%|█▍        | 21/149 [00:05<00:28,  4.56it/s][A
Training:  15%|█▌        | 23/149 [00:05<00:22,  5.69it/s][A
Training:  17%|█▋        | 25/149 [00:05<00:25,  4.80it/s][A
Training:  18%|█▊        | 27/149 [00:05<00:20,  6.01it/s][A
Training:  19%|█▉     

Epoch: 40/59 - Loss: 3.2473 - Accuracy: 0.9373



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:19,  1.09s/it][A
 26%|██▋       | 5/19 [00:01<00:04,  3.41it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.80it/s][A
 68%|██████▊   | 13/19 [00:02<00:01,  5.51it/s][A
100%|██████████| 19/19 [00:03<00:00,  5.27it/s][A
Epochs:  68%|██████▊   | 40/59 [19:21<09:50, 31.06s/it]

Val Loss: 3.9226 - Val Accuracy: 0.9284



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:34,  1.04s/it][A
Training:   3%|▎         | 4/149 [00:01<00:33,  4.36it/s][A
Training:   4%|▍         | 6/149 [00:01<00:35,  4.05it/s][A
Training:   6%|▌         | 9/149 [00:02<00:29,  4.81it/s][A
Training:   7%|▋         | 11/149 [00:02<00:22,  6.24it/s][A
Training:   9%|▊         | 13/149 [00:02<00:26,  5.09it/s][A
Training:  11%|█         | 16/149 [00:02<00:17,  7.45it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:22,  5.95it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:21,  5.96it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:20,  6.14it/s][A
Training:  16%|█▌        | 24/149 [00:04<00:16,  7.60it/s][A
Training:  17%|█▋        | 26/149 [00:04<00:18,  6.68it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:18,  6.44it/s][A
Training:  21%|██▏       | 32/149 [00:05<00:13,  8.78it/s][A
Training:  23%|██▎       | 34/149 [00:05<00:16,  6.97it/s][A
Training:  25%|██▍  

Epoch: 41/59 - Loss: 3.1720 - Accuracy: 0.9384



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:15,  1.14it/s][A
 21%|██        | 4/19 [00:01<00:03,  4.97it/s][A
 32%|███▏      | 6/19 [00:01<00:03,  4.09it/s][A
 47%|████▋     | 9/19 [00:02<00:01,  5.06it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  5.35it/s][A
 68%|██████▊   | 13/19 [00:02<00:01,  5.76it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  5.54it/s][A
 89%|████████▉ | 17/19 [00:03<00:00,  6.54it/s][A
100%|██████████| 19/19 [00:03<00:00,  5.52it/s][A
Epochs:  69%|██████▉   | 41/59 [19:47<08:51, 29.52s/it]

Val Loss: 4.2339 - Val Accuracy: 0.9242



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:33,  1.04s/it][A
Training:   2%|▏         | 3/149 [00:01<00:45,  3.21it/s][A
Training:   3%|▎         | 5/149 [00:01<00:37,  3.88it/s][A
Training:   5%|▌         | 8/149 [00:01<00:20,  7.00it/s][A
Training:   7%|▋         | 10/149 [00:02<00:22,  6.18it/s][A
Training:   9%|▊         | 13/149 [00:02<00:22,  6.01it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:19,  6.67it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:14,  8.69it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:16,  7.87it/s][A
Training:  16%|█▌        | 24/149 [00:03<00:13,  9.15it/s][A
Training:  17%|█▋        | 26/149 [00:04<00:15,  7.97it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:17,  6.81it/s][A
Training:  21%|██        | 31/149 [00:04<00:15,  7.44it/s][A
Training:  22%|██▏       | 33/149 [00:05<00:18,  6.36it/s][A
Training:  24%|██▍       | 36/149 [00:05<00:13,  8.65it/s][A
Training:  26%|██▌  

Epoch: 42/59 - Loss: 3.2804 - Accuracy: 0.9361



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:01<00:18,  1.04s/it][A
 21%|██        | 4/19 [00:01<00:03,  4.45it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  4.47it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.97it/s][A
 68%|██████▊   | 13/19 [00:02<00:01,  5.62it/s][A
 79%|███████▉  | 15/19 [00:02<00:00,  6.78it/s][A
100%|██████████| 19/19 [00:03<00:00,  5.49it/s][A
Epochs:  71%|███████   | 42/59 [20:13<08:01, 28.31s/it]

Val Loss: 3.8420 - Val Accuracy: 0.9242



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:36,  1.05s/it][A
Training:   3%|▎         | 4/149 [00:01<00:33,  4.33it/s][A
Training:   4%|▍         | 6/149 [00:01<00:33,  4.23it/s][A
Training:   5%|▌         | 8/149 [00:01<00:23,  6.01it/s][A
Training:   7%|▋         | 10/149 [00:02<00:26,  5.16it/s][A
Training:   8%|▊         | 12/149 [00:02<00:20,  6.84it/s][A
Training:   9%|▉         | 14/149 [00:02<00:24,  5.43it/s][A
Training:  11%|█         | 16/149 [00:03<00:19,  6.93it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:25,  5.09it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:20,  6.37it/s][A
Training:  15%|█▍        | 22/149 [00:04<00:23,  5.29it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:21,  5.88it/s][A
Training:  18%|█▊        | 27/149 [00:04<00:16,  7.18it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:20,  5.98it/s][A
Training:  21%|██▏       | 32/149 [00:05<00:14,  8.35it/s][A
Training:  23%|██▎  

Epoch: 43/59 - Loss: 3.0898 - Accuracy: 0.9398



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:17,  1.01it/s][A
 21%|██        | 4/19 [00:01<00:03,  4.60it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  4.55it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.88it/s][A
 68%|██████▊   | 13/19 [00:02<00:01,  5.89it/s][A
 84%|████████▍ | 16/19 [00:02<00:00,  8.01it/s][A
100%|██████████| 19/19 [00:03<00:00,  5.61it/s][A
Epochs:  73%|███████▎  | 43/59 [20:39<07:22, 27.68s/it]

Val Loss: 3.9215 - Val Accuracy: 0.9280



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:18,  1.07it/s][A
Training:   1%|▏         | 2/149 [00:01<01:09,  2.10it/s][A
Training:   3%|▎         | 5/149 [00:01<00:36,  3.98it/s][A
Training:   5%|▍         | 7/149 [00:01<00:25,  5.58it/s][A
Training:   6%|▌         | 9/149 [00:02<00:25,  5.42it/s][A
Training:   7%|▋         | 11/149 [00:02<00:21,  6.33it/s][A
Training:   9%|▊         | 13/149 [00:02<00:21,  6.37it/s][A
Training:   9%|▉         | 14/149 [00:02<00:20,  6.58it/s][A
Training:  10%|█         | 15/149 [00:02<00:20,  6.66it/s][A
Training:  11%|█▏        | 17/149 [00:03<00:20,  6.45it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:20,  6.43it/s][A
Training:  13%|█▎        | 19/149 [00:03<00:19,  6.63it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:23,  5.53it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:21,  5.89it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:21,  5.68it/s][A
Training:  18%|█▊    

Epoch: 44/59 - Loss: 3.2160 - Accuracy: 0.9362



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:16,  1.08it/s][A
 16%|█▌        | 3/19 [00:01<00:04,  3.59it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.14it/s][A
 37%|███▋      | 7/19 [00:01<00:02,  5.73it/s][A
 47%|████▋     | 9/19 [00:02<00:02,  4.99it/s][A
 63%|██████▎   | 12/19 [00:02<00:00,  7.76it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  5.70it/s][A
100%|██████████| 19/19 [00:03<00:00,  5.46it/s][A
Epochs:  75%|███████▍  | 44/59 [21:05<06:47, 27.19s/it]

Val Loss: 4.0412 - Val Accuracy: 0.9277



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:01<02:28,  1.00s/it][A
Training:   3%|▎         | 5/149 [00:01<00:40,  3.57it/s][A
Training:   5%|▍         | 7/149 [00:01<00:27,  5.17it/s][A
Training:   6%|▌         | 9/149 [00:02<00:32,  4.33it/s][A
Training:   9%|▊         | 13/149 [00:03<00:27,  4.87it/s][A
Training:  11%|█         | 16/149 [00:03<00:19,  6.87it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:22,  5.81it/s][A
Training:  14%|█▍        | 21/149 [00:04<00:22,  5.63it/s][A
Training:  15%|█▌        | 23/149 [00:04<00:19,  6.48it/s][A
Training:  17%|█▋        | 25/149 [00:04<00:22,  5.63it/s][A
Training:  19%|█▉        | 29/149 [00:05<00:21,  5.61it/s][A
Training:  21%|██▏       | 32/149 [00:05<00:15,  7.47it/s][A
Training:  23%|██▎       | 34/149 [00:06<00:17,  6.39it/s][A
Training:  25%|██▍       | 37/149 [00:06<00:19,  5.71it/s][A
Training:  26%|██▌       | 39/149 [00:06<00:16,  6.84it/s][A
Training:  28%|██▊  

Epoch: 45/59 - Loss: 3.1481 - Accuracy: 0.9389



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:17,  1.01it/s][A
 16%|█▌        | 3/19 [00:01<00:04,  3.37it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  3.58it/s][A
 37%|███▋      | 7/19 [00:01<00:02,  5.19it/s][A
 47%|████▋     | 9/19 [00:02<00:01,  5.12it/s][A
 53%|█████▎    | 10/19 [00:02<00:01,  5.66it/s][A
 58%|█████▊    | 11/19 [00:02<00:01,  5.20it/s][A
 68%|██████▊   | 13/19 [00:02<00:01,  5.40it/s][A
 79%|███████▉  | 15/19 [00:03<00:00,  6.17it/s][A
100%|██████████| 19/19 [00:03<00:00,  5.41it/s][A
Epochs:  76%|███████▋  | 45/59 [21:31<06:14, 26.78s/it]

Val Loss: 3.9391 - Val Accuracy: 0.9295



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:21,  1.04it/s][A
Training:   1%|▏         | 2/149 [00:01<01:06,  2.20it/s][A
Training:   3%|▎         | 5/149 [00:01<00:36,  3.93it/s][A
Training:   5%|▍         | 7/149 [00:01<00:24,  5.72it/s][A
Training:   6%|▌         | 9/149 [00:02<00:24,  5.62it/s][A
Training:   7%|▋         | 11/149 [00:02<00:19,  7.13it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  7.00it/s][A
Training:   9%|▉         | 14/149 [00:02<00:19,  6.80it/s][A
Training:  11%|█         | 16/149 [00:02<00:15,  8.61it/s][A
Training:  12%|█▏        | 18/149 [00:03<00:18,  7.25it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:16,  7.84it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:18,  6.87it/s][A
Training:  16%|█▌        | 24/149 [00:03<00:15,  8.30it/s][A
Training:  17%|█▋        | 26/149 [00:04<00:18,  6.83it/s][A
Training:  19%|█▉        | 28/149 [00:04<00:14,  8.13it/s][A
Training:  20%|██    

Epoch: 46/59 - Loss: 3.1493 - Accuracy: 0.9391



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.21it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.12it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.77it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.87it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.85it/s][A
Epochs:  78%|███████▊  | 46/59 [21:55<05:37, 25.94s/it]

Val Loss: 3.8404 - Val Accuracy: 0.9306



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:48,  1.36it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.69it/s][A
Training:   6%|▌         | 9/149 [00:01<00:23,  5.94it/s][A
Training:   8%|▊         | 12/149 [00:01<00:16,  8.28it/s][A
Training:   9%|▉         | 14/149 [00:02<00:18,  7.16it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:17,  7.71it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:12, 10.00it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:16,  7.91it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:15,  7.98it/s][A
Training:  18%|█▊        | 27/149 [00:03<00:13,  9.24it/s][A
Training:  19%|█▉        | 29/149 [00:03<00:15,  7.74it/s][A
Training:  21%|██▏       | 32/149 [00:04<00:11, 10.29it/s][A
Training:  23%|██▎       | 34/149 [00:04<00:14,  8.17it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:15,  7.35it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:13,  8.20it/s][A
Training:  30%|███ 

Epoch: 47/59 - Loss: 3.0541 - Accuracy: 0.9399



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:13,  1.29it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.91it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.30it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  7.17it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.96it/s][A
Epochs:  80%|███████▉  | 47/59 [22:16<04:52, 24.40s/it]

Val Loss: 3.8957 - Val Accuracy: 0.9252



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:54,  1.29it/s][A
Training:   3%|▎         | 5/149 [00:01<00:29,  4.93it/s][A
Training:   6%|▌         | 9/149 [00:01<00:20,  6.81it/s][A
Training:   7%|▋         | 11/149 [00:01<00:17,  7.93it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  6.84it/s][A
Training:  10%|█         | 15/149 [00:02<00:15,  8.39it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:19,  6.95it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:17,  7.36it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:14,  8.29it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:14,  8.18it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:14,  8.09it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:13,  8.32it/s][A
Training:  27%|██▋       | 40/149 [00:05<00:11,  9.90it/s][A
Training:  28%|██▊       | 42/149 [00:05<00:11,  9.25it/s][A
Training:  30%|███       | 45/149 [00:05<00:11,  8.76it/s][A
Training:  33%|███▎

Epoch: 48/59 - Loss: 3.1911 - Accuracy: 0.9392



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:12,  1.40it/s][A
 16%|█▌        | 3/19 [00:00<00:03,  4.49it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.73it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.18it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.93it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.09it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.80it/s][A
Epochs:  81%|████████▏ | 48/59 [22:36<04:15, 23.19s/it]

Val Loss: 3.8542 - Val Accuracy: 0.9297



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:04,  1.19it/s][A
Training:   3%|▎         | 4/149 [00:00<00:27,  5.36it/s][A
Training:   4%|▍         | 6/149 [00:01<00:24,  5.82it/s][A
Training:   6%|▌         | 9/149 [00:01<00:24,  5.82it/s][A
Training:   9%|▊         | 13/149 [00:02<00:18,  7.19it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:16,  7.83it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:13,  9.86it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:15,  8.36it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:15,  7.84it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:15,  7.63it/s][A
Training:  21%|██▏       | 32/149 [00:04<00:12,  9.52it/s][A
Training:  23%|██▎       | 34/149 [00:04<00:13,  8.32it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:14,  7.79it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:12,  8.52it/s][A
Training:  30%|███       | 45/149 [00:05<00:12,  8.59it/s][A
Training:  32%|███▏ 

Epoch: 49/59 - Loss: 3.0088 - Accuracy: 0.9416



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.28it/s][A
 21%|██        | 4/19 [00:00<00:02,  5.69it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.92it/s][A
 42%|████▏     | 8/19 [00:01<00:01,  7.68it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  6.76it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.25it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.06it/s][A
 79%|███████▉  | 15/19 [00:02<00:00,  7.30it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.77it/s][A
Epochs:  83%|████████▎ | 49/59 [22:57<03:44, 22.48s/it]

Val Loss: 3.8835 - Val Accuracy: 0.9298



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:01,  1.22it/s][A
Training:   3%|▎         | 5/149 [00:01<00:31,  4.61it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.16it/s][A
Training:   8%|▊         | 12/149 [00:01<00:15,  8.57it/s][A
Training:   9%|▉         | 14/149 [00:02<00:19,  7.01it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:18,  7.04it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:16,  7.98it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:14,  8.76it/s][A
Training:  19%|█▉        | 29/149 [00:03<00:12,  9.25it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:12,  9.19it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:12,  9.06it/s][A
Training:  27%|██▋       | 40/149 [00:04<00:10, 10.87it/s][A
Training:  28%|██▊       | 42/149 [00:05<00:12,  8.60it/s][A
Training:  30%|███       | 45/149 [00:05<00:12,  8.00it/s][A
Training:  32%|███▏      | 48/149 [00:05<00:10, 10.08it/s][A
Training:  34%|███▎

Epoch: 50/59 - Loss: 3.0188 - Accuracy: 0.9394



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.27it/s][A
 21%|██        | 4/19 [00:00<00:02,  5.63it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.05it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.86it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.68it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.78it/s][A
Epochs:  85%|████████▍ | 50/59 [23:17<03:17, 21.90s/it]

Val Loss: 3.8142 - Val Accuracy: 0.9297



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:51,  1.33it/s][A
Training:   3%|▎         | 4/149 [00:00<00:25,  5.59it/s][A
Training:   4%|▍         | 6/149 [00:01<00:26,  5.46it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.10it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  6.99it/s][A
Training:  11%|█         | 16/149 [00:02<00:14,  9.43it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:19,  6.56it/s][A
Training:  13%|█▎        | 20/149 [00:03<00:16,  7.66it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:18,  6.91it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:17,  7.24it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:12,  9.74it/s][A
Training:  20%|██        | 30/149 [00:04<00:15,  7.75it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:15,  7.32it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:14,  7.72it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:12,  8.36it/s][A
Training:  30%|███  

Epoch: 51/59 - Loss: 3.0937 - Accuracy: 0.9395



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:15,  1.13it/s][A
 21%|██        | 4/19 [00:01<00:03,  4.92it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.32it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.74it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  7.28it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.89it/s][A
Epochs:  86%|████████▋ | 51/59 [23:38<02:53, 21.66s/it]

Val Loss: 3.9496 - Val Accuracy: 0.9280



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:59,  1.24it/s][A
Training:   3%|▎         | 4/149 [00:00<00:26,  5.51it/s][A
Training:   4%|▍         | 6/149 [00:01<00:27,  5.22it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.30it/s][A
Training:   9%|▊         | 13/149 [00:02<00:17,  7.57it/s][A
Training:  11%|█         | 16/149 [00:02<00:13,  9.57it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:16,  7.99it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:14,  9.12it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:16,  7.73it/s][A
Training:  16%|█▌        | 24/149 [00:03<00:14,  8.44it/s][A
Training:  17%|█▋        | 26/149 [00:03<00:16,  7.35it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:14,  8.28it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:16,  7.45it/s][A
Training:  21%|██▏       | 32/149 [00:04<00:12,  9.13it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:15,  7.59it/s][A
Training:  24%|██▍  

Epoch: 52/59 - Loss: 3.1225 - Accuracy: 0.9370



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:13,  1.33it/s][A
 21%|██        | 4/19 [00:00<00:02,  5.65it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.39it/s][A
 42%|████▏     | 8/19 [00:01<00:01,  7.38it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  6.59it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.14it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.07it/s][A
 84%|████████▍ | 16/19 [00:02<00:00,  8.83it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.98it/s][A
Epochs:  88%|████████▊ | 52/59 [23:59<02:29, 21.42s/it]

Val Loss: 3.8658 - Val Accuracy: 0.9297



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:46,  1.39it/s][A
Training:   3%|▎         | 5/149 [00:01<00:29,  4.92it/s][A
Training:   5%|▌         | 8/149 [00:01<00:18,  7.68it/s][A
Training:   7%|▋         | 10/149 [00:01<00:20,  6.77it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  7.13it/s][A
Training:  11%|█         | 16/149 [00:02<00:13,  9.51it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:16,  7.90it/s][A
Training:  14%|█▍        | 21/149 [00:02<00:16,  7.74it/s][A
Training:  15%|█▌        | 23/149 [00:03<00:15,  8.33it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:16,  7.48it/s][A
Training:  18%|█▊        | 27/149 [00:03<00:15,  8.08it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:15,  7.77it/s][A
Training:  21%|██        | 31/149 [00:04<00:13,  8.71it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:14,  7.75it/s][A
Training:  23%|██▎       | 35/149 [00:04<00:12,  9.20it/s][A
Training:  25%|██▍ 

Epoch: 53/59 - Loss: 3.0174 - Accuracy: 0.9390



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:13,  1.33it/s][A
 11%|█         | 2/19 [00:00<00:07,  2.39it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.96it/s][A
 37%|███▋      | 7/19 [00:01<00:01,  7.03it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.76it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  6.97it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.74it/s][A
 84%|████████▍ | 16/19 [00:02<00:00,  9.62it/s][A
100%|██████████| 19/19 [00:03<00:00,  6.33it/s][A
Epochs:  90%|████████▉ | 53/59 [24:20<02:07, 21.29s/it]

Val Loss: 3.8703 - Val Accuracy: 0.9301



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:58,  1.25it/s][A
Training:   3%|▎         | 5/149 [00:01<00:31,  4.57it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.33it/s][A
Training:   9%|▊         | 13/149 [00:02<00:18,  7.30it/s][A
Training:  11%|█         | 16/149 [00:02<00:14,  9.47it/s][A
Training:  12%|█▏        | 18/149 [00:02<00:16,  7.99it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:13,  9.34it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:16,  7.80it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:16,  7.50it/s][A
Training:  19%|█▉        | 29/149 [00:03<00:13,  8.76it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:12,  9.17it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:11,  9.37it/s][A
Training:  27%|██▋       | 40/149 [00:04<00:09, 11.37it/s][A
Training:  28%|██▊       | 42/149 [00:05<00:12,  8.77it/s][A
Training:  30%|███       | 45/149 [00:05<00:12,  8.04it/s][A
Training:  33%|███▎

Epoch: 54/59 - Loss: 3.1772 - Accuracy: 0.9382



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:12,  1.44it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.72it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.01it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.91it/s][A
 84%|████████▍ | 16/19 [00:02<00:00,  9.16it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.03it/s][A
Epochs:  92%|█████████▏| 54/59 [24:40<01:44, 20.91s/it]

Val Loss: 3.9779 - Val Accuracy: 0.9275



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:45,  1.40it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.79it/s][A
Training:   6%|▌         | 9/149 [00:01<00:23,  5.86it/s][A
Training:   8%|▊         | 12/149 [00:01<00:16,  8.24it/s][A
Training:   9%|▉         | 14/149 [00:02<00:17,  7.63it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:18,  7.13it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:13,  9.55it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:16,  7.84it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:15,  7.80it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:15,  7.99it/s][A
Training:  21%|██▏       | 32/149 [00:04<00:11, 10.11it/s][A
Training:  23%|██▎       | 34/149 [00:04<00:13,  8.25it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:14,  7.63it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:14,  7.63it/s][A
Training:  30%|███       | 45/149 [00:05<00:12,  8.50it/s][A
Training:  32%|███▏

Epoch: 55/59 - Loss: 3.0486 - Accuracy: 0.9408



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.25it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.88it/s][A
 42%|████▏     | 8/19 [00:01<00:01,  8.08it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  7.14it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.90it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.56it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.23it/s][A
Epochs:  93%|█████████▎| 55/59 [25:01<01:23, 20.85s/it]

Val Loss: 3.8413 - Val Accuracy: 0.9325



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:04,  1.19it/s][A
Training:   3%|▎         | 5/149 [00:01<00:31,  4.52it/s][A
Training:   6%|▌         | 9/149 [00:01<00:21,  6.43it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  7.07it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:17,  7.36it/s][A
Training:  13%|█▎        | 19/149 [00:02<00:15,  8.30it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:16,  7.65it/s][A
Training:  16%|█▌        | 24/149 [00:03<00:12, 10.15it/s][A
Training:  17%|█▋        | 26/149 [00:03<00:15,  8.14it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:15,  7.53it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:15,  7.33it/s][A
Training:  24%|██▍       | 36/149 [00:04<00:11,  9.42it/s][A
Training:  26%|██▌       | 38/149 [00:05<00:13,  8.19it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:14,  7.55it/s][A
Training:  30%|███       | 45/149 [00:06<00:12,  8.18it/s][A
Training:  33%|███▎

Epoch: 56/59 - Loss: 3.0711 - Accuracy: 0.9396



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:14,  1.24it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.51it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.19it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  7.00it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.96it/s][A
Epochs:  95%|█████████▍| 56/59 [25:22<01:02, 20.81s/it]

Val Loss: 4.0466 - Val Accuracy: 0.9214



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:59,  1.24it/s][A
Training:   3%|▎         | 4/149 [00:00<00:26,  5.54it/s][A
Training:   4%|▍         | 6/149 [00:01<00:26,  5.31it/s][A
Training:   5%|▌         | 8/149 [00:01<00:19,  7.28it/s][A
Training:   7%|▋         | 10/149 [00:01<00:23,  6.03it/s][A
Training:   9%|▊         | 13/149 [00:02<00:20,  6.58it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:17,  7.71it/s][A
Training:  13%|█▎        | 20/149 [00:02<00:13,  9.84it/s][A
Training:  15%|█▍        | 22/149 [00:03<00:15,  8.11it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:17,  7.17it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:12,  9.46it/s][A
Training:  20%|██        | 30/149 [00:04<00:15,  7.59it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:15,  7.41it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:13,  8.32it/s][A
Training:  27%|██▋       | 40/149 [00:05<00:10, 10.42it/s][A
Training:  28%|██▊  

Epoch: 57/59 - Loss: 3.0553 - Accuracy: 0.9402



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:15,  1.19it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.72it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.22it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.92it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.80it/s][A
Epochs:  97%|█████████▋| 57/59 [25:42<00:41, 20.74s/it]

Val Loss: 3.8768 - Val Accuracy: 0.9246



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<01:45,  1.41it/s][A
Training:   2%|▏         | 3/149 [00:00<00:33,  4.34it/s][A
Training:   3%|▎         | 5/149 [00:01<00:30,  4.75it/s][A
Training:   5%|▍         | 7/149 [00:01<00:20,  6.99it/s][A
Training:   6%|▌         | 9/149 [00:01<00:23,  6.07it/s][A
Training:   7%|▋         | 11/149 [00:01<00:17,  7.77it/s][A
Training:   9%|▊         | 13/149 [00:02<00:19,  7.03it/s][A
Training:  10%|█         | 15/149 [00:02<00:16,  8.27it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:18,  7.29it/s][A
Training:  13%|█▎        | 19/149 [00:02<00:15,  8.26it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:17,  7.48it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:14,  8.65it/s][A
Training:  19%|█▉        | 29/149 [00:04<00:13,  8.77it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:12,  9.24it/s][A
Training:  25%|██▍       | 37/149 [00:04<00:12,  8.75it/s][A
Training:  28%|██▊   

Epoch: 58/59 - Loss: 2.9565 - Accuracy: 0.9413



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:13,  1.30it/s][A
 16%|█▌        | 3/19 [00:00<00:03,  4.20it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  4.95it/s][A
 32%|███▏      | 6/19 [00:01<00:02,  5.00it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  7.20it/s][A
 53%|█████▎    | 10/19 [00:01<00:01,  7.02it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  8.62it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  8.25it/s][A
 89%|████████▉ | 17/19 [00:02<00:00,  8.69it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.78it/s][A
Epochs:  98%|█████████▊| 58/59 [26:03<00:20, 20.57s/it]

Val Loss: 3.8641 - Val Accuracy: 0.9263



Training:   0%|          | 0/149 [00:00<?, ?it/s][A
Training:   1%|          | 1/149 [00:00<02:05,  1.18it/s][A
Training:   3%|▎         | 5/149 [00:01<00:32,  4.49it/s][A
Training:   6%|▌         | 9/149 [00:01<00:22,  6.23it/s][A
Training:   9%|▊         | 13/149 [00:02<00:18,  7.27it/s][A
Training:  11%|█▏        | 17/149 [00:02<00:16,  7.79it/s][A
Training:  14%|█▍        | 21/149 [00:03<00:17,  7.53it/s][A
Training:  17%|█▋        | 25/149 [00:03<00:15,  8.12it/s][A
Training:  19%|█▉        | 28/149 [00:03<00:12,  9.83it/s][A
Training:  20%|██        | 30/149 [00:04<00:14,  8.08it/s][A
Training:  22%|██▏       | 33/149 [00:04<00:14,  7.86it/s][A
Training:  25%|██▍       | 37/149 [00:05<00:14,  7.87it/s][A
Training:  26%|██▌       | 39/149 [00:05<00:12,  8.99it/s][A
Training:  28%|██▊       | 41/149 [00:05<00:14,  7.21it/s][A
Training:  30%|███       | 45/149 [00:06<00:13,  7.56it/s][A
Training:  33%|███▎      | 49/149 [00:06<00:12,  7.72it/s][A
Training:  36%|███▌

Epoch: 59/59 - Loss: 2.9669 - Accuracy: 0.9402



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:12,  1.41it/s][A
 11%|█         | 2/19 [00:00<00:06,  2.82it/s][A
 26%|██▋       | 5/19 [00:01<00:02,  5.05it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  6.50it/s][A
 63%|██████▎   | 12/19 [00:01<00:00,  8.70it/s][A
 74%|███████▎  | 14/19 [00:02<00:00,  7.68it/s][A
100%|██████████| 19/19 [00:02<00:00,  7.11it/s][A
Epochs: 100%|██████████| 59/59 [26:24<00:00, 26.85s/it]


Val Loss: 3.8980 - Val Accuracy: 0.9252
Saving best model...


[32m[I 2023-12-14 11:33:02,158][0m Trial 17 finished with value: 0.9251726865768433 and parameters: {'loss_learning_rate': 0.0033107389307156145, 'learning_rate': 0.0003838612275375301, 'weight_decay': 0.0033012134894320976, 'epsilon': 3.39228871048199e-09, 'batch_size': 94, 'epochs': 59}. Best is trial 17 with value: 0.9251726865768433.[0m


Learning rate for Loss: 0.00295946123694127
Learning rate: 0.00023228095383764677
Weight decay: 0.00883375036543313
Epsilon: 3.5618045490627407e-09
Batch size: 87
Number of epochs: 61


Epochs:   0%|          | 0/61 [00:00<?, ?it/s]
Training:   0%|          | 0/161 [00:00<?, ?it/s][A
Training:   1%|          | 1/161 [00:00<01:58,  1.35it/s][A
Training:   2%|▏         | 4/161 [00:00<00:27,  5.64it/s][A
Training:   4%|▎         | 6/161 [00:01<00:30,  5.17it/s][A
Training:   6%|▌         | 9/161 [00:01<00:25,  5.99it/s][A
Training:   7%|▋         | 11/161 [00:01<00:19,  7.65it/s][A
Training:   8%|▊         | 13/161 [00:02<00:21,  6.97it/s][A
Training:  10%|▉         | 16/161 [00:02<00:14,  9.94it/s][A
Training:  11%|█         | 18/161 [00:02<00:17,  8.31it/s][A
Training:  13%|█▎        | 21/161 [00:03<00:17,  8.00it/s][A
Training:  15%|█▍        | 24/161 [00:03<00:12, 10.64it/s][A
Training:  16%|█▌        | 26/161 [00:03<00:14,  9.03it/s][A
Training:  18%|█▊        | 29/161 [00:03<00:15,  8.34it/s][A
Training:  20%|██        | 33/161 [00:04<00:14,  9.01it/s][A
Training:  22%|██▏       | 36/161 [00:04<00:11, 11.31it/s][A
Training:  24%|██▎       | 38/161 [0

Epoch: 1/61 - Loss: 10.4988 - Accuracy: 0.8038



  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:14,  1.40it/s][A
 24%|██▍       | 5/21 [00:01<00:03,  5.08it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  6.53it/s][A
 62%|██████▏   | 13/21 [00:02<00:01,  7.52it/s][A
 67%|██████▋   | 14/21 [00:02<00:00,  7.77it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.33it/s][A
100%|██████████| 21/21 [00:02<00:00,  7.66it/s][A
Epochs:   0%|          | 0/61 [00:21<?, ?it/s]
[32m[I 2023-12-14 11:33:23,669][0m Trial 18 pruned. [0m


Val Loss: 8.7323 - Val Accuracy: 0.8336
Learning rate for Loss: 0.00919567288602305
Learning rate: 6.346639482141987e-05
Weight decay: 0.003551007288934496
Epsilon: 7.051683125684268e-09
Batch size: 95
Number of epochs: 87


Epochs:   0%|          | 0/87 [00:00<?, ?it/s]
Training:   0%|          | 0/148 [00:00<?, ?it/s][A
Training:   1%|          | 1/148 [00:00<01:51,  1.32it/s][A
Training:   3%|▎         | 4/148 [00:00<00:24,  5.77it/s][A
Training:   4%|▍         | 6/148 [00:01<00:26,  5.37it/s][A
Training:   6%|▌         | 9/148 [00:01<00:24,  5.76it/s][A
Training:   7%|▋         | 10/148 [00:01<00:22,  6.08it/s][A
Training:   9%|▉         | 13/148 [00:02<00:19,  6.96it/s][A
Training:   9%|▉         | 14/148 [00:02<00:18,  7.10it/s][A
Training:  11%|█▏        | 17/148 [00:02<00:15,  8.25it/s][A
Training:  12%|█▏        | 18/148 [00:02<00:16,  7.98it/s][A
Training:  14%|█▍        | 21/148 [00:03<00:14,  8.48it/s][A
Training:  15%|█▍        | 22/148 [00:03<00:15,  7.98it/s][A
Training:  17%|█▋        | 25/148 [00:03<00:13,  9.09it/s][A
Training:  18%|█▊        | 26/148 [00:03<00:15,  7.75it/s][A
Training:  20%|█▉        | 29/148 [00:04<00:12,  9.26it/s][A
Training:  20%|██        | 30/148 [0

Epoch: 1/87 - Loss: 15.1554 - Accuracy: 0.6500



  0%|          | 0/19 [00:00<?, ?it/s][A
  5%|▌         | 1/19 [00:00<00:16,  1.10it/s][A
 26%|██▋       | 5/19 [00:01<00:03,  4.38it/s][A
 47%|████▋     | 9/19 [00:01<00:01,  5.97it/s][A
 68%|██████▊   | 13/19 [00:02<00:00,  6.30it/s][A
100%|██████████| 19/19 [00:02<00:00,  6.44it/s][A
Epochs:   0%|          | 0/87 [00:20<?, ?it/s]
[32m[I 2023-12-14 11:33:45,026][0m Trial 19 pruned. [0m


Val Loss: 7.7492 - Val Accuracy: 0.8426

Study statistics: 
  Number of finished trials:  20
  Number of pruned trials:  14
  Number of complete trials:  6


In [22]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9251726865768433
  Params: 
    batch_size: 94
    epochs: 59
    epsilon: 3.39228871048199e-09
    learning_rate: 0.0003838612275375301
    loss_learning_rate: 0.0033107389307156145
    weight_decay: 0.0033012134894320976


In [None]:
# ViT P8-S8 CosFace Mean

Best trial:
Value:  0.9251726865768433
Params: 
batch_size: 94
epochs: 59
epsilon: 3.39228871048199e-09
learning_rate: 0.0003838612275375301
loss_learning_rate: 0.0033107389307156145
weight_decay: 0.0033012134894320976