In [1]:
import os
import random
import pandas as pd
import numpy as np
import mxnet as mx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import Dataset, DataLoader
from einops import rearrange, repeat
import optuna
from optuna.trial import TrialState
from tqdm import tqdm

In [42]:
def file_to_embed(embeds, file):
    emb = []
    for f in file:
        emb.append(embeds[f][0])
    return torch.stack(emb)

In [3]:
MIN_NUM_PATCHES = 16

In [4]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [5]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir, train=False):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
        self.is_train = train
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = mx.image.imread(img_path)
        if image.shape[1] != 112:
            image = mx.image.resize_short(image, 112)
        image = mx.nd.transpose(image, axes=(2,0,1))
        image = torch.tensor(image.asnumpy()).type(torch.FloatTensor)
        label = self.img_lbls.iloc[idx, 1]
        
        if self.is_train:
            positive_list = self.img_lbls[self.img_lbls.iloc[:, 1] == label].index.values
            positive_list = np.setdiff1d(positive_list, np.array([idx]))
            positive_item = random.choice(positive_list)
            positive_img = self.img_lbls.iloc[positive_item, 0]
            pos_img_path = os.path.join(self.img_dir, positive_img)
            pos_image = mx.image.imread(pos_img_path)
            if pos_image.shape[1] != 112:
                pos_image = mx.image.resize_short(pos_image, 112)
            pos_image = mx.nd.transpose(pos_image, axes=(2,0,1))
            pos_image = torch.tensor(pos_image.asnumpy()).type(torch.FloatTensor)
            
            negative_list = self.img_lbls[self.img_lbls.iloc[:, 1] != label].index.values
            negative_item = random.choice(negative_list)
            negative_img = self.img_lbls.iloc[negative_item, 0]
            neg_img_path = os.path.join(self.img_dir, negative_img)
            neg_image = mx.image.imread(neg_img_path)
            if neg_image.shape[1] != 112:
                neg_image = mx.image.resize_short(neg_image, 112)
            neg_image = mx.nd.transpose(neg_image, axes=(2,0,1))
            neg_image = torch.tensor(neg_image.asnumpy()).type(torch.FloatTensor)
            
            return image, pos_image, neg_image, label, img_file, positive_img, negative_img

        return image, label, img_file

In [6]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=False)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=False)

In [20]:
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=True)

In [7]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()

In [8]:
class CombinedLoss(nn.Module):
    def __init__(self, beta=1.0):
        super(CombinedLoss, self).__init__()
        self.beta = beta
        self.triplet = TripletLoss(margin=1.0)
        self.classification = nn.CrossEntropyLoss()
        
    def forward(self, anchor, positive, negative, classification_out, labels):
        triplet_loss = self.triplet(anchor, positive, negative)
        classification_loss = self.classification(classification_out, labels)
        total_loss = (self.beta * triplet_loss) + classification_loss
        
        return total_loss

In [9]:
class CosFace(nn.Module):
    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel.
                       if device_id=None, it will be trained on CPU without model parallel.
        s: norm of input feature
        m: margin
        cos(theta)-m
    """

    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.35):
        super(CosFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device_id = device_id
        self.s = s
        self.m = m
        print("self.device_id", self.device_id)
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------

        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
                                   dim=1)
        phi = cosine - self.m
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot

        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + (
                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + 'in_features = ' + str(self.in_features) \
               + ', out_features = ' + str(self.out_features) \
               + ', s = ' + str(self.s) \
               + ', m = ' + str(self.m) + ')'

In [10]:
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

In [11]:
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

In [12]:
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

In [13]:
class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        self.heads = heads
        self.scale = dim ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask = None):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
        mask_value = -torch.finfo(dots.dtype).max
        #embed()
        if mask is not None:
            mask = F.pad(mask.flatten(1), (1, 0), value = True)
            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
            mask = mask[:, None, :] * mask[:, :, None]
            dots.masked_fill_(~mask, mask_value)
            del mask

        attn = dots.softmax(dim=-1)

        out = torch.einsum('bhij,bhjd->bhid', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out =  self.to_out(out)

        return out

In [14]:
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)))
            ]))
    def forward(self, x, mask = None):
        for attn, ff in self.layers:
            x = attn(x, mask = mask)
            #embed()
            x = ff(x)
        return x

In [15]:
class ViT_face(nn.Module):
    def __init__(self, *, loss_type, GPU_ID, num_class, image_size, patch_size, dim, depth, heads, mlp_dim, pool = 'mean', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * patch_size ** 2
        assert num_patches > MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for attention to be effective (at least 16). Try decreasing your patch size'
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.patch_size = patch_size

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.patch_to_embedding = nn.Linear(patch_dim, dim)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
        )
        self.loss_type = loss_type
        self.GPU_ID = GPU_ID
        if self.loss_type == 'None':
            print("no loss for vit_face")
        else:
            if self.loss_type == 'CosFace':
                self.loss = CosFace(in_features=dim, out_features=num_class, device_id=self.GPU_ID)

    def forward(self, img, label=None, mask=None):
        p = self.patch_size
        
        x = rearrange(img, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = p, p2 = p)
        x = self.patch_to_embedding(x)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x, mask)

        y = x[:, 0]
        z = x[:, 1:].mean(dim = 1) if self.pool == 'mean' else x[:, 0]

        y = self.to_latent(y)
        emb_y = self.mlp_head(y)
        z = self.to_latent(z)
        emb_z = self.mlp_head(z)
        emb = torch.cat((emb_y, emb_z), dim=1)
        if label is not None:
            x = self.loss(emb, label)
            return x, emb
        else:
            return emb

In [23]:
class ViT_plus(nn.Module):
    def __init__(self):
        super(ViT_plus, self).__init__()
        
        self.fc1 = nn.Linear(in_features=1024, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=2)
        
    def forward(self, x):
        x = self.fc1(x)
        x_cosface = x
        x_classification = self.fc2(x)
        
        return x_cosface, x_classification

In [17]:
model = ViT_face(
            image_size=112,
            patch_size=8,
            loss_type='CosFace',
            GPU_ID= [device],
            num_class=93431,
            dim=512,
            depth=20,
            heads=8,
            mlp_dim=2048,
            dropout=0.1,
            emb_dropout=0.1
        ).to(device)
model.load_state_dict(
    torch.load("../Backbone_VIT_Epoch_2_Batch_20000_Time_2021-01-12-16-48_checkpoint.pth", map_location=device)
)

self.device_id [device(type='cuda', index=1)]


<All keys matched successfully>

In [18]:
for param in model.parameters():
    param.requires_grad = False

In [19]:
embeds = {}
model.eval()

with torch.no_grad():
    for img, label, file in train_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

    for img, label, file in val_data:
        img = img.to(device)
        embeds[file] = model(torch.unsqueeze(img, 0))

In [46]:
best_accu = 0.9446031451225281
def objective(trial):
    model_xtr = ViT_plus().to(device)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    eps = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
    optimizer = opt.AdamW(model_xtr.parameters(), lr=lr, eps=eps, weight_decay=wd)
    
    criterion = CombinedLoss().to(device)
    # arc_margin = losses.ArcFaceLoss(classes, embedding_size, margin=0.5, scale=30.0)
    
    batch_size = trial.suggest_int('batch_size', 10, 100)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate: "+ str(lr))
    print("Weight decay: "+ str(wd))
    print("Epsilon: "+ str(eps))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model_xtr.train()
        for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(train_loader, desc="Training", leave=False):
            img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)

            x1 = file_to_embed(embeds, img_file)
            x2 = file_to_embed(embeds, pos_file)
            x3 = file_to_embed(embeds, neg_file)
            
            optimizer.zero_grad()
            anchor, output = model_xtr(x1)
            pos, _ = model_xtr(x2)
            neg, _ = model_xtr(x3)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            # class_triplet_loss = criterion(anchor_out, positive_out, negative_out, anchor_pred, anchor_label)
            # arc_loss = arc_margin(anchor_out, anchor_label)
            # loss = (gamma * arc_loss) + class_triplet_loss
            loss = criterion(anchor, pos, neg, output, label)
            loss.backward()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model_xtr.eval()
        with torch.no_grad():
            for img, pos_img, neg_img, label, img_file, pos_file, neg_file in tqdm(val_loader):
                img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)
                
                x1 = file_to_embed(embeds, img_file)
                x2 = file_to_embed(embeds, pos_file)
                x3 = file_to_embed(embeds, neg_file)
                
                anchor, output = model_xtr(x1)
                pos, _ = model_xtr(x2)
                neg, _ = model_xtr(x3)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                # class_triplet_loss = criterion(anchor, pos, neg, output, label)
                # arc_loss = arc_margin(anchor, label)
                # loss = (gamma * arc_loss) + class_triplet_loss
                loss = criterion(anchor, pos, neg, output, label)
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model_xtr.state_dict(), "../vit_8-8_triplet_mean.pt")
            
    return val_accu

In [47]:
study = optuna.create_study(direction='maximize',
                            study_name='triplet-8-8-mean-vit-study',
                            storage='sqlite:///study.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=10)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-12-07 09:26:12,562][0m Using an existing study with name 'triplet-8-8-mean-vit-study' instead of creating a new one.[0m


Learning rate: 0.00025553954183956626
Weight decay: 0.00036438686299700114
Epsilon: 8.811775566096176e-08
Batch size: 48
Number of epochs: 12


Epochs:   0%|          | 0/12 [00:00<?, ?it/s]
Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:57,  1.23it/s][A
Training:   2%|▏         | 5/292 [00:01<01:04,  4.46it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.06it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.88it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.33it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.59it/s][A
Training:   9%|▊         | 25/292 [00:03<00:34,  7.69it/s][A
Training:  10%|▉         | 29/292 [00:04<00:33,  7.92it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:32,  8.09it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  8.15it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.28it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:29,  8.35it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:29,  8.24it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:29,  8.22it/s][A
Training:  20%|█▉        | 57/292 [

Epoch: 1/12 - Loss: 7.7887 - Accuracy: 0.8789



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:27,  1.31it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.63it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.09it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  6.91it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.40it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.72it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.93it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.13it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.29it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.71it/s][A
Epochs:   8%|▊         | 1/12 [00:40<07:26, 40.62s/it]

Val Loss: 5.5887 - Val Accuracy: 0.9247



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:51,  1.26it/s][A
Training:   2%|▏         | 5/292 [00:01<01:02,  4.61it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.08it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.96it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.37it/s][A
Training:   7%|▋         | 21/292 [00:03<00:34,  7.79it/s][A
Training:   9%|▊         | 25/292 [00:03<00:33,  7.99it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.12it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:31,  8.24it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:30,  8.36it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:29,  8.38it/s][A
Training:  15%|█▌        | 45/292 [00:05<00:29,  8.44it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:29,  8.33it/s][A
Training:  18%|█▊        | 53/292 [00:06<00:28,  8.31it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.34it/s][A
Training:  21%|██  

Epoch: 2/12 - Loss: 4.7009 - Accuracy: 0.9368



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:27,  1.31it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.71it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.23it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  7.01it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.47it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.87it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  8.00it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.12it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.21it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.76it/s][A
Epochs:  17%|█▋        | 2/12 [01:20<06:43, 40.36s/it]

Val Loss: 4.7864 - Val Accuracy: 0.9366



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:45,  1.29it/s][A
Training:   2%|▏         | 5/292 [00:01<01:02,  4.62it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.13it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.94it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.33it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.66it/s][A
Training:   9%|▊         | 25/292 [00:03<00:34,  7.83it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.07it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:31,  8.16it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  8.21it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.28it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:29,  8.36it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:28,  8.43it/s][A
Training:  18%|█▊        | 53/292 [00:06<00:28,  8.33it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.22it/s][A
Training:  21%|██  

Epoch: 3/12 - Loss: 3.8988 - Accuracy: 0.9419



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:26,  1.38it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.78it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.27it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  7.10it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.53it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.73it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.92it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.09it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.25it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.74it/s][A
Epochs:  25%|██▌       | 3/12 [02:01<06:02, 40.33s/it]

Val Loss: 4.0082 - Val Accuracy: 0.9354



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:49,  1.27it/s][A
Training:   2%|▏         | 5/292 [00:01<01:03,  4.53it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.03it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.85it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.33it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.69it/s][A
Training:   9%|▊         | 25/292 [00:03<00:33,  7.91it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.14it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:31,  8.20it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:30,  8.30it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:29,  8.39it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:29,  8.46it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:28,  8.44it/s][A
Training:  18%|█▊        | 53/292 [00:06<00:28,  8.40it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.25it/s][A
Training:  21%|██  

Epoch: 4/12 - Loss: 3.5075 - Accuracy: 0.9446



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:26,  1.37it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.83it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.32it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  7.14it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.53it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.87it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  8.00it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.17it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.27it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.82it/s][A
Epochs:  33%|███▎      | 4/12 [02:41<05:21, 40.23s/it]

Val Loss: 4.2551 - Val Accuracy: 0.9372



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:48,  1.27it/s][A
Training:   2%|▏         | 5/292 [00:01<01:02,  4.57it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.11it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.89it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.36it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.71it/s][A
Training:   9%|▊         | 25/292 [00:03<00:33,  7.93it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.06it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:31,  8.10it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:30,  8.23it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.35it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:29,  8.41it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:28,  8.49it/s][A
Training:  18%|█▊        | 53/292 [00:06<00:28,  8.37it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.34it/s][A
Training:  21%|██  

Epoch: 5/12 - Loss: 3.2224 - Accuracy: 0.9477



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:25,  1.39it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.86it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.40it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  7.15it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.60it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.89it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  8.11it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.19it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.29it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.86it/s][A
Epochs:  42%|████▏     | 5/12 [03:21<04:41, 40.16s/it]

Val Loss: 3.5575 - Val Accuracy: 0.9377



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:38,  1.33it/s][A
Training:   2%|▏         | 5/292 [00:01<01:00,  4.72it/s][A
Training:   3%|▎         | 9/292 [00:01<00:44,  6.31it/s][A
Training:   4%|▍         | 13/292 [00:02<00:39,  6.98it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.37it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.68it/s][A
Training:   9%|▊         | 25/292 [00:03<00:33,  7.98it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.17it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:31,  8.25it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:30,  8.28it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.34it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:30,  8.19it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:29,  8.24it/s][A
Training:  18%|█▊        | 53/292 [00:06<00:29,  8.22it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.28it/s][A
Training:  21%|██  

Epoch: 6/12 - Loss: 3.1323 - Accuracy: 0.9484



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:26,  1.38it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.78it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.26it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  6.97it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.35it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.56it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.71it/s][A
 78%|███████▊  | 29/37 [00:04<00:01,  7.80it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  7.94it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.59it/s][A
Epochs:  50%|█████     | 6/12 [04:01<04:01, 40.20s/it]

Val Loss: 3.4691 - Val Accuracy: 0.9338



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:55,  1.24it/s][A
Training:   2%|▏         | 5/292 [00:01<01:06,  4.32it/s][A
Training:   3%|▎         | 9/292 [00:01<00:49,  5.76it/s][A
Training:   4%|▍         | 13/292 [00:02<00:42,  6.49it/s][A
Training:   6%|▌         | 17/292 [00:02<00:39,  6.94it/s][A
Training:   7%|▋         | 21/292 [00:03<00:37,  7.21it/s][A
Training:   9%|▊         | 25/292 [00:03<00:36,  7.35it/s][A
Training:  10%|▉         | 29/292 [00:04<00:35,  7.47it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:34,  7.60it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:33,  7.66it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:32,  7.70it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:31,  7.76it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:31,  7.74it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:30,  7.74it/s][A
Training:  20%|█▉        | 57/292 [00:08<00:30,  7.73it/s][A
Training:  21%|██  

Epoch: 7/12 - Loss: 2.8408 - Accuracy: 0.9494



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:27,  1.31it/s][A
 14%|█▎        | 5/37 [00:01<00:07,  4.56it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  5.98it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  6.79it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.24it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.58it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.72it/s][A
 78%|███████▊  | 29/37 [00:04<00:01,  7.85it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  7.94it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.50it/s][A
Epochs:  58%|█████▊    | 7/12 [04:43<03:24, 40.95s/it]

Val Loss: 3.2290 - Val Accuracy: 0.9428



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:50,  1.26it/s][A
Training:   2%|▏         | 5/292 [00:01<01:03,  4.49it/s][A
Training:   3%|▎         | 9/292 [00:01<00:47,  5.98it/s][A
Training:   4%|▍         | 13/292 [00:02<00:41,  6.74it/s][A
Training:   6%|▌         | 17/292 [00:02<00:38,  7.24it/s][A
Training:   7%|▋         | 21/292 [00:03<00:36,  7.46it/s][A
Training:   9%|▊         | 25/292 [00:03<00:34,  7.66it/s][A
Training:  10%|▉         | 29/292 [00:04<00:33,  7.77it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:32,  7.94it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  8.11it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.11it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:30,  8.09it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:30,  8.07it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:29,  8.11it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:29,  8.08it/s][A
Training:  21%|██  

Epoch: 8/12 - Loss: 2.9249 - Accuracy: 0.9499



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:26,  1.34it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.70it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.16it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  6.88it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.38it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.64it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.86it/s][A
 78%|███████▊  | 29/37 [00:04<00:01,  7.95it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.04it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.63it/s][A
Epochs:  67%|██████▋   | 8/12 [05:24<02:43, 40.94s/it]

Val Loss: 3.2135 - Val Accuracy: 0.9416



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:47,  1.28it/s][A
Training:   2%|▏         | 5/292 [00:01<01:02,  4.60it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.04it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.82it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.29it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.62it/s][A
Training:   9%|▊         | 25/292 [00:03<00:34,  7.83it/s][A
Training:  10%|▉         | 29/292 [00:04<00:33,  7.93it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:32,  7.99it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  8.05it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.10it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:30,  8.10it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:29,  8.11it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:29,  8.18it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.23it/s][A
Training:  21%|██  

Epoch: 9/12 - Loss: 2.7267 - Accuracy: 0.9513



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:26,  1.35it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.72it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.26it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  7.01it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.53it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.77it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.96it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.06it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.13it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.72it/s][A
Epochs:  75%|███████▌  | 9/12 [06:05<02:02, 40.94s/it]

Val Loss: 3.3598 - Val Accuracy: 0.9405



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:45,  1.29it/s][A
Training:   2%|▏         | 5/292 [00:01<01:01,  4.65it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.11it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.92it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.38it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.64it/s][A
Training:   9%|▊         | 25/292 [00:03<00:34,  7.82it/s][A
Training:  10%|▉         | 29/292 [00:04<00:33,  7.81it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:32,  7.90it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  7.99it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.10it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:30,  8.11it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:29,  8.16it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:28,  8.27it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.25it/s][A
Training:  21%|██  

Epoch: 10/12 - Loss: 2.6534 - Accuracy: 0.9528



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:27,  1.32it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.72it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.27it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  7.07it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.58it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.81it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  8.13it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.25it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.30it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.83it/s][A
Epochs:  83%|████████▎ | 10/12 [06:46<01:21, 40.71s/it]

Val Loss: 3.4311 - Val Accuracy: 0.9455



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:39,  1.33it/s][A
Training:   2%|▏         | 5/292 [00:01<01:01,  4.68it/s][A
Training:   3%|▎         | 9/292 [00:01<00:45,  6.19it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.94it/s][A
Training:   6%|▌         | 17/292 [00:02<00:37,  7.39it/s][A
Training:   7%|▋         | 21/292 [00:03<00:35,  7.66it/s][A
Training:   9%|▊         | 25/292 [00:03<00:33,  7.86it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.03it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:32,  8.01it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  8.08it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.14it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:30,  8.00it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:30,  8.07it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:29,  8.19it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:29,  8.09it/s][A
Training:  21%|██  

Epoch: 11/12 - Loss: 2.6983 - Accuracy: 0.9521



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:27,  1.33it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.67it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.11it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  6.91it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.37it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.70it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  7.91it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.08it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.21it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.69it/s][A
Epochs:  92%|█████████▏| 11/12 [07:26<00:40, 40.75s/it]

Val Loss: 3.5894 - Val Accuracy: 0.9394



Training:   0%|          | 0/292 [00:00<?, ?it/s][A
Training:   0%|          | 1/292 [00:00<03:50,  1.26it/s][A
Training:   2%|▏         | 5/292 [00:01<01:03,  4.53it/s][A
Training:   3%|▎         | 9/292 [00:01<00:46,  6.12it/s][A
Training:   4%|▍         | 13/292 [00:02<00:40,  6.96it/s][A
Training:   6%|▌         | 17/292 [00:02<00:36,  7.47it/s][A
Training:   7%|▋         | 21/292 [00:03<00:34,  7.80it/s][A
Training:   9%|▊         | 25/292 [00:03<00:33,  7.97it/s][A
Training:  10%|▉         | 29/292 [00:04<00:32,  8.11it/s][A
Training:  11%|█▏        | 33/292 [00:04<00:31,  8.12it/s][A
Training:  13%|█▎        | 37/292 [00:05<00:31,  8.22it/s][A
Training:  14%|█▍        | 41/292 [00:05<00:30,  8.16it/s][A
Training:  15%|█▌        | 45/292 [00:06<00:29,  8.25it/s][A
Training:  17%|█▋        | 49/292 [00:06<00:29,  8.22it/s][A
Training:  18%|█▊        | 53/292 [00:07<00:29,  8.16it/s][A
Training:  20%|█▉        | 57/292 [00:07<00:28,  8.16it/s][A
Training:  21%|██  

Epoch: 12/12 - Loss: 2.5400 - Accuracy: 0.9509



  0%|          | 0/37 [00:00<?, ?it/s][A
  3%|▎         | 1/37 [00:00<00:28,  1.28it/s][A
 14%|█▎        | 5/37 [00:01<00:06,  4.61it/s][A
 24%|██▍       | 9/37 [00:01<00:04,  6.15it/s][A
 35%|███▌      | 13/37 [00:02<00:03,  6.94it/s][A
 46%|████▌     | 17/37 [00:02<00:02,  7.42it/s][A
 57%|█████▋    | 21/37 [00:03<00:02,  7.67it/s][A
 68%|██████▊   | 25/37 [00:03<00:01,  8.00it/s][A
 78%|███████▊  | 29/37 [00:04<00:00,  8.12it/s][A
 89%|████████▉ | 33/37 [00:04<00:00,  8.24it/s][A
100%|██████████| 37/37 [00:04<00:00,  7.69it/s][A
Epochs: 100%|██████████| 12/12 [08:07<00:00, 40.61s/it]
[32m[I 2023-12-07 09:34:20,269][0m Trial 14 finished with value: 0.9382618069648743 and parameters: {'learning_rate': 0.00025553954183956626, 'weight_decay': 0.00036438686299700114, 'epsilon': 8.811775566096176e-08, 'batch_size': 48, 'epochs': 12}. Best is trial 6 with value: 0.9446031451225281.[0m


Val Loss: 3.5575 - Val Accuracy: 0.9383
Learning rate: 0.00017577182722460614
Weight decay: 0.0004870807997825659
Epsilon: 9.400445435858009e-08
Batch size: 37
Number of epochs: 14


Epochs:   0%|          | 0/14 [00:00<?, ?it/s]
Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:06,  1.53it/s][A
Training:   1%|▏         | 5/378 [00:01<01:04,  5.82it/s][A
Training:   2%|▏         | 9/378 [00:01<00:47,  7.75it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.71it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.32it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.83it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.12it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.33it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.46it/s][A
Training:  10%|▉         | 37/378 [00:03<00:32, 10.56it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.60it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.60it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:31, 10.54it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.77it/s][A
Training:  15%|█▌        | 57/378 [

Epoch: 1/14 - Loss: 8.2051 - Accuracy: 0.8610



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:28,  1.63it/s][A
 10%|█         | 5/48 [00:00<00:07,  5.88it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.88it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  9.02it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.69it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.18it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.42it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.60it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.77it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.87it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.84it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.23it/s][A
Epochs:   7%|▋         | 1/14 [00:40<08:45, 40.39s/it]

Val Loss: 5.8045 - Val Accuracy: 0.9268



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:00,  1.57it/s][A
Training:   1%|▏         | 5/378 [00:01<01:05,  5.68it/s][A
Training:   2%|▏         | 9/378 [00:01<00:48,  7.60it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.85it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.47it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.90it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.10it/s][A
Training:   8%|▊         | 29/378 [00:03<00:34, 10.24it/s][A
Training:   9%|▊         | 33/378 [00:03<00:33, 10.39it/s][A
Training:  10%|▉         | 37/378 [00:03<00:32, 10.58it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.58it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.64it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.66it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.75it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.81it/s][A
Training:  16%|█▌  

Epoch: 2/14 - Loss: 4.6493 - Accuracy: 0.9342



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:29,  1.57it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.79it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.76it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.80it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.54it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.96it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.28it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.53it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.64it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.75it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.97it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.15it/s][A
Epochs:  14%|█▍        | 2/14 [01:21<08:06, 40.52s/it]

Val Loss: 4.6521 - Val Accuracy: 0.9341



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:05,  1.54it/s][A
Training:   1%|▏         | 5/378 [00:01<01:07,  5.52it/s][A
Training:   2%|▏         | 9/378 [00:01<00:49,  7.48it/s][A
Training:   3%|▎         | 13/378 [00:01<00:42,  8.63it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.32it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.74it/s][A
Training:   7%|▋         | 25/378 [00:02<00:35, 10.08it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.45it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.59it/s][A
Training:  10%|▉         | 37/378 [00:04<00:31, 10.70it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.82it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.74it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.78it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:29, 10.87it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.80it/s][A
Training:  16%|█▌  

Epoch: 3/14 - Loss: 3.8471 - Accuracy: 0.9395



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:30,  1.54it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.62it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.75it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.85it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.46it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.94it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.32it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.49it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.67it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.83it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.74it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.05it/s][A
Epochs:  21%|██▏       | 3/14 [02:01<07:25, 40.51s/it]

Val Loss: 3.8202 - Val Accuracy: 0.9414



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:01,  1.56it/s][A
Training:   1%|▏         | 5/378 [00:01<01:04,  5.74it/s][A
Training:   2%|▏         | 9/378 [00:01<00:48,  7.68it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.78it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.37it/s][A
Training:   6%|▌         | 21/378 [00:02<00:35,  9.97it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.20it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.35it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.48it/s][A
Training:  10%|▉         | 37/378 [00:03<00:32, 10.54it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.59it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.74it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.82it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:29, 10.92it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.94it/s][A
Training:  16%|█▌  

Epoch: 4/14 - Loss: 3.4027 - Accuracy: 0.9434



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:28,  1.63it/s][A
 10%|█         | 5/48 [00:00<00:07,  5.96it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.97it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.98it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.65it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.02it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.34it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.50it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.66it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.80it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.88it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.22it/s][A
Epochs:  29%|██▊       | 4/14 [02:41<06:44, 40.41s/it]

Val Loss: 3.5895 - Val Accuracy: 0.9369



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:06,  1.53it/s][A
Training:   1%|▏         | 5/378 [00:01<01:06,  5.57it/s][A
Training:   2%|▏         | 9/378 [00:01<00:49,  7.51it/s][A
Training:   3%|▎         | 13/378 [00:01<00:42,  8.65it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.35it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.78it/s][A
Training:   7%|▋         | 25/378 [00:02<00:35, 10.06it/s][A
Training:   8%|▊         | 29/378 [00:03<00:34, 10.20it/s][A
Training:   9%|▊         | 33/378 [00:03<00:33, 10.37it/s][A
Training:  10%|▉         | 37/378 [00:04<00:32, 10.43it/s][A
Training:  11%|█         | 41/378 [00:04<00:32, 10.53it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.68it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.79it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:29, 10.91it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.91it/s][A
Training:  16%|█▌  

Epoch: 5/14 - Loss: 3.0421 - Accuracy: 0.9467



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:30,  1.54it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.72it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.71it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.88it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.53it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.91it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.16it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.40it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.57it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.73it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.74it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.06it/s][A
Epochs:  36%|███▌      | 5/14 [03:22<06:03, 40.41s/it]

Val Loss: 3.0195 - Val Accuracy: 0.9437



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:05,  1.54it/s][A
Training:   1%|▏         | 5/378 [00:01<01:05,  5.70it/s][A
Training:   2%|▏         | 9/378 [00:01<00:47,  7.76it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.80it/s][A
Training:   4%|▍         | 17/378 [00:02<00:37,  9.50it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.84it/s][A
Training:   7%|▋         | 25/378 [00:02<00:35, 10.02it/s][A
Training:   8%|▊         | 29/378 [00:03<00:34, 10.14it/s][A
Training:   9%|▊         | 33/378 [00:03<00:33, 10.30it/s][A
Training:  10%|▉         | 37/378 [00:04<00:32, 10.52it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.60it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.64it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.64it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.68it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:30, 10.69it/s][A
Training:  16%|█▌  

Epoch: 6/14 - Loss: 2.9408 - Accuracy: 0.9480



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:30,  1.56it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.78it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.94it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.99it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.60it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.09it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.37it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.56it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.56it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.63it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.73it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.11it/s][A
Epochs:  43%|████▎     | 6/14 [04:02<05:23, 40.48s/it]

Val Loss: 3.0211 - Val Accuracy: 0.9414



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:12,  1.49it/s][A
Training:   1%|▏         | 5/378 [00:01<01:06,  5.59it/s][A
Training:   2%|▏         | 9/378 [00:01<00:48,  7.54it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.72it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.49it/s][A
Training:   6%|▌         | 21/378 [00:02<00:35,  9.92it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.10it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.32it/s][A
Training:   9%|▊         | 33/378 [00:03<00:33, 10.37it/s][A
Training:  10%|▉         | 37/378 [00:04<00:32, 10.50it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.65it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.70it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.71it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.77it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.74it/s][A
Training:  16%|█▌  

Epoch: 7/14 - Loss: 2.7714 - Accuracy: 0.9503



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:30,  1.54it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.77it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.77it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.92it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.45it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.93it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.15it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.25it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.34it/s][A
 77%|███████▋  | 37/48 [00:04<00:01, 10.44it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.50it/s][A
100%|██████████| 48/48 [00:04<00:00,  9.95it/s][A
Epochs:  50%|█████     | 7/14 [04:43<04:43, 40.56s/it]

Val Loss: 3.0314 - Val Accuracy: 0.9471



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:12,  1.49it/s][A
Training:   1%|▏         | 5/378 [00:01<01:07,  5.54it/s][A
Training:   2%|▏         | 9/378 [00:01<00:49,  7.42it/s][A
Training:   3%|▎         | 13/378 [00:01<00:42,  8.66it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.42it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.91it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.28it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.35it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.59it/s][A
Training:  10%|▉         | 37/378 [00:04<00:32, 10.63it/s][A
Training:  11%|█         | 41/378 [00:04<00:32, 10.50it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.57it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.68it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.64it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.74it/s][A
Training:  16%|█▌  

Epoch: 8/14 - Loss: 2.4907 - Accuracy: 0.9493



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:29,  1.62it/s][A
 10%|█         | 5/48 [00:00<00:07,  5.95it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.89it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.99it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.68it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.12it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.38it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.57it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.59it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.64it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.60it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.11it/s][A
Epochs:  57%|█████▋    | 8/14 [05:24<04:03, 40.56s/it]

Val Loss: 3.3148 - Val Accuracy: 0.9431



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:08,  1.52it/s][A
Training:   1%|▏         | 5/378 [00:01<01:07,  5.55it/s][A
Training:   2%|▏         | 9/378 [00:01<00:48,  7.57it/s][A
Training:   3%|▎         | 13/378 [00:01<00:42,  8.64it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.43it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.87it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.22it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.42it/s][A
Training:   9%|▊         | 33/378 [00:03<00:33, 10.30it/s][A
Training:  10%|▉         | 37/378 [00:04<00:32, 10.40it/s][A
Training:  11%|█         | 41/378 [00:04<00:32, 10.47it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.61it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.67it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.73it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.80it/s][A
Training:  16%|█▌  

Epoch: 9/14 - Loss: 2.3834 - Accuracy: 0.9502



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:30,  1.54it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.69it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.66it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.87it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.62it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.10it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.21it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.24it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.39it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.48it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.55it/s][A
100%|██████████| 48/48 [00:04<00:00,  9.96it/s][A
Epochs:  64%|██████▍   | 9/14 [06:04<03:22, 40.60s/it]

Val Loss: 2.9865 - Val Accuracy: 0.9420



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:07,  1.53it/s][A
Training:   1%|▏         | 5/378 [00:01<01:05,  5.67it/s][A
Training:   2%|▏         | 9/378 [00:01<00:48,  7.65it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.77it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.49it/s][A
Training:   6%|▌         | 21/378 [00:02<00:35,  9.93it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.18it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.36it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.56it/s][A
Training:  10%|▉         | 37/378 [00:03<00:32, 10.65it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.75it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:30, 10.75it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.67it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.73it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.86it/s][A
Training:  16%|█▌  

Epoch: 10/14 - Loss: 2.3647 - Accuracy: 0.9530



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:29,  1.58it/s][A
 10%|█         | 5/48 [00:00<00:07,  5.83it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.87it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.94it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.48it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.84it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.12it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.36it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.51it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.65it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.63it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.02it/s][A
Epochs:  71%|███████▏  | 10/14 [06:45<02:42, 40.56s/it]

Val Loss: 2.9554 - Val Accuracy: 0.9465



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:16,  1.47it/s][A
Training:   1%|▏         | 5/378 [00:01<01:07,  5.49it/s][A
Training:   2%|▏         | 9/378 [00:01<00:49,  7.45it/s][A
Training:   3%|▎         | 13/378 [00:01<00:42,  8.61it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.45it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.80it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.15it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.32it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.53it/s][A
Training:  10%|▉         | 37/378 [00:04<00:32, 10.61it/s][A
Training:  11%|█         | 41/378 [00:04<00:32, 10.49it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.47it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:31, 10.55it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.58it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:30, 10.50it/s][A
Training:  16%|█▌  

Epoch: 11/14 - Loss: 2.2122 - Accuracy: 0.9528



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:29,  1.62it/s][A
 10%|█         | 5/48 [00:00<00:07,  5.96it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.97it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.95it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.51it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.98it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.31it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.56it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.55it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.61it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.79it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.14it/s][A
Epochs:  79%|███████▊  | 11/14 [07:25<02:01, 40.61s/it]

Val Loss: 2.5225 - Val Accuracy: 0.9443



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:11,  1.50it/s][A
Training:   1%|▏         | 5/378 [00:01<01:06,  5.58it/s][A
Training:   2%|▏         | 9/378 [00:01<00:48,  7.59it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.77it/s][A
Training:   4%|▍         | 17/378 [00:02<00:38,  9.45it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.86it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.17it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.40it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.64it/s][A
Training:  10%|▉         | 37/378 [00:03<00:31, 10.69it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.72it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.69it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.64it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.64it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.74it/s][A
Training:  16%|█▌  

Epoch: 12/14 - Loss: 2.2825 - Accuracy: 0.9520



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:30,  1.54it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.70it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.67it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.84it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.52it/s][A
 44%|████▍     | 21/48 [00:02<00:02,  9.98it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.33it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.61it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.72it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.68it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.69it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.08it/s][A
Epochs:  86%|████████▌ | 12/14 [08:06<01:21, 40.51s/it]

Val Loss: 2.9685 - Val Accuracy: 0.9420



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<03:54,  1.61it/s][A
Training:   1%|▏         | 5/378 [00:00<01:03,  5.83it/s][A
Training:   2%|▏         | 9/378 [00:01<00:46,  7.87it/s][A
Training:   3%|▎         | 13/378 [00:01<00:40,  9.02it/s][A
Training:   4%|▍         | 17/378 [00:02<00:37,  9.63it/s][A
Training:   6%|▌         | 21/378 [00:02<00:36,  9.91it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.23it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.52it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.62it/s][A
Training:  10%|▉         | 37/378 [00:03<00:31, 10.76it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.75it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:31, 10.70it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.74it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:30, 10.69it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.77it/s][A
Training:  16%|█▌  

Epoch: 13/14 - Loss: 2.1420 - Accuracy: 0.9551



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:31,  1.50it/s][A
 10%|█         | 5/48 [00:01<00:07,  5.68it/s][A
 19%|█▉        | 9/48 [00:01<00:05,  7.73it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  8.88it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.51it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.00it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.31it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.47it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.59it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.76it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.80it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.09it/s][A
Epochs:  93%|█████████▎| 13/14 [08:46<00:40, 40.43s/it]

Val Loss: 2.6825 - Val Accuracy: 0.9369



Training:   0%|          | 0/378 [00:00<?, ?it/s][A
Training:   0%|          | 1/378 [00:00<04:11,  1.50it/s][A
Training:   1%|▏         | 5/378 [00:01<01:05,  5.67it/s][A
Training:   2%|▏         | 9/378 [00:01<00:47,  7.72it/s][A
Training:   3%|▎         | 13/378 [00:01<00:41,  8.79it/s][A
Training:   4%|▍         | 17/378 [00:02<00:37,  9.50it/s][A
Training:   6%|▌         | 21/378 [00:02<00:35, 10.02it/s][A
Training:   7%|▋         | 25/378 [00:02<00:34, 10.28it/s][A
Training:   8%|▊         | 29/378 [00:03<00:33, 10.50it/s][A
Training:   9%|▊         | 33/378 [00:03<00:32, 10.58it/s][A
Training:  10%|▉         | 37/378 [00:03<00:31, 10.71it/s][A
Training:  11%|█         | 41/378 [00:04<00:31, 10.80it/s][A
Training:  12%|█▏        | 45/378 [00:04<00:30, 10.86it/s][A
Training:  13%|█▎        | 49/378 [00:05<00:30, 10.82it/s][A
Training:  14%|█▍        | 53/378 [00:05<00:29, 10.84it/s][A
Training:  15%|█▌        | 57/378 [00:05<00:29, 10.88it/s][A
Training:  16%|█▌  

Epoch: 14/14 - Loss: 2.1168 - Accuracy: 0.9538



  0%|          | 0/48 [00:00<?, ?it/s][A
  2%|▏         | 1/48 [00:00<00:29,  1.61it/s][A
 10%|█         | 5/48 [00:00<00:07,  5.85it/s][A
 19%|█▉        | 9/48 [00:01<00:04,  7.86it/s][A
 27%|██▋       | 13/48 [00:01<00:03,  9.02it/s][A
 35%|███▌      | 17/48 [00:02<00:03,  9.70it/s][A
 44%|████▍     | 21/48 [00:02<00:02, 10.12it/s][A
 52%|█████▏    | 25/48 [00:02<00:02, 10.38it/s][A
 60%|██████    | 29/48 [00:03<00:01, 10.66it/s][A
 69%|██████▉   | 33/48 [00:03<00:01, 10.75it/s][A
 77%|███████▋  | 37/48 [00:03<00:01, 10.84it/s][A
 85%|████████▌ | 41/48 [00:04<00:00, 10.85it/s][A
100%|██████████| 48/48 [00:04<00:00, 10.22it/s][A
Epochs: 100%|██████████| 14/14 [09:26<00:00, 40.48s/it]
[32m[I 2023-12-07 09:43:47,277][0m Trial 15 finished with value: 0.9425676465034485 and parameters: {'learning_rate': 0.00017577182722460614, 'weight_decay': 0.0004870807997825659, 'epsilon': 9.400445435858009e-08, 'batch_size': 37, 'epochs': 14}. Best is trial 6 with value: 0.944603145122

Val Loss: 3.0170 - Val Accuracy: 0.9426
Learning rate: 0.00032038885686179594
Weight decay: 0.0012417026259634344
Epsilon: 3.2008346584419735e-08
Batch size: 59
Number of epochs: 29


Epochs:   0%|          | 0/29 [00:00<?, ?it/s]
Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:34,  1.10it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.85it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  4.99it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.71it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.05it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.29it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.45it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.53it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.62it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.66it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.67it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.72it/s][A
Training:  21%|██        | 49/237 [00:07<00:27,  6.79it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.75it/s][A
Training:  24%|██▍       | 57/237 [

Epoch: 1/29 - Loss: 7.7611 - Accuracy: 0.8781



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.16it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.94it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.12it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.71it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.09it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.34it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.55it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.98it/s][A
Epochs:   3%|▎         | 1/29 [00:41<19:08, 41.02s/it]

Val Loss: 5.4597 - Val Accuracy: 0.9255



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:26,  1.14it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.87it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  4.98it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.59it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.02it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.22it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.38it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.49it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.60it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.61it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.58it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.63it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.68it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.68it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.70it/s][A
Training:  26%|██▌ 

Epoch: 2/29 - Loss: 4.6756 - Accuracy: 0.9364



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.14it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.86it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.06it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.74it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.15it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.40it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.59it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.98it/s][A
Epochs:   7%|▋         | 2/29 [01:21<18:23, 40.85s/it]

Val Loss: 4.7412 - Val Accuracy: 0.9360



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:36,  1.09it/s][A
Training:   2%|▏         | 5/237 [00:01<01:01,  3.78it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  4.99it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.61it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.05it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.32it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.48it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.58it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.68it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.76it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.74it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.68it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.70it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.74it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.81it/s][A
Training:  26%|██▌ 

Epoch: 3/29 - Loss: 3.9000 - Accuracy: 0.9444



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.14it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.91it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.08it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.73it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.17it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.38it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.51it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.95it/s][A
Epochs:  10%|█         | 3/29 [02:02<17:39, 40.75s/it]

Val Loss: 4.5583 - Val Accuracy: 0.9355



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:24,  1.16it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.91it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.05it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.70it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.12it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.34it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.46it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.60it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.67it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.68it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.73it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.72it/s][A
Training:  21%|██        | 49/237 [00:07<00:28,  6.69it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.70it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.73it/s][A
Training:  26%|██▌ 

Epoch: 4/29 - Loss: 3.8155 - Accuracy: 0.9433



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.13it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.93it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.15it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.81it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.20it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.42it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.61it/s][A
100%|██████████| 30/30 [00:04<00:00,  6.01it/s][A
Epochs:  14%|█▍        | 4/29 [02:42<16:57, 40.69s/it]

Val Loss: 4.1530 - Val Accuracy: 0.9360



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:21,  1.17it/s][A
Training:   2%|▏         | 5/237 [00:01<00:57,  4.01it/s][A
Training:   4%|▍         | 9/237 [00:02<00:43,  5.21it/s][A
Training:   5%|▌         | 13/237 [00:02<00:38,  5.82it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.19it/s][A
Training:   9%|▉         | 21/237 [00:03<00:33,  6.41it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.61it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.68it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.68it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.73it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:28,  6.79it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.80it/s][A
Training:  21%|██        | 49/237 [00:07<00:27,  6.79it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.81it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.79it/s][A
Training:  26%|██▌ 

Epoch: 5/29 - Loss: 3.4034 - Accuracy: 0.9458



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.16it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.96it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.23it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.82it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.17it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.43it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.54it/s][A
100%|██████████| 30/30 [00:04<00:00,  6.03it/s][A
Epochs:  17%|█▋        | 5/29 [03:23<16:14, 40.61s/it]

Val Loss: 3.4827 - Val Accuracy: 0.9396



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:21,  1.17it/s][A
Training:   2%|▏         | 5/237 [00:01<00:58,  3.96it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.16it/s][A
Training:   5%|▌         | 13/237 [00:02<00:38,  5.77it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.17it/s][A
Training:   9%|▉         | 21/237 [00:03<00:33,  6.41it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.51it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.60it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.68it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.74it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:28,  6.76it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.79it/s][A
Training:  21%|██        | 49/237 [00:07<00:27,  6.77it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:26,  6.83it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.80it/s][A
Training:  26%|██▌ 

Epoch: 6/29 - Loss: 3.2233 - Accuracy: 0.9484



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.13it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.96it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.17it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.81it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.17it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.41it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.55it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.98it/s][A
Epochs:  21%|██        | 6/29 [04:03<15:32, 40.56s/it]

Val Loss: 3.9000 - Val Accuracy: 0.9385



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:22,  1.16it/s][A
Training:   2%|▏         | 5/237 [00:01<00:58,  3.95it/s][A
Training:   4%|▍         | 9/237 [00:02<00:43,  5.19it/s][A
Training:   5%|▌         | 13/237 [00:02<00:38,  5.79it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.03it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.30it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.46it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.55it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.65it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.73it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:28,  6.76it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.79it/s][A
Training:  21%|██        | 49/237 [00:07<00:27,  6.76it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.80it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.79it/s][A
Training:  26%|██▌ 

Epoch: 7/29 - Loss: 3.0422 - Accuracy: 0.9494



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:24,  1.18it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  4.03it/s][A
 30%|███       | 9/30 [00:01<00:03,  5.29it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.93it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.25it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.47it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.59it/s][A
100%|██████████| 30/30 [00:04<00:00,  6.09it/s][A
Epochs:  24%|██▍       | 7/29 [04:44<14:52, 40.55s/it]

Val Loss: 3.5370 - Val Accuracy: 0.9406



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:26,  1.14it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.92it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.10it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.72it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.12it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.32it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.53it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.61it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.75it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.77it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.68it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.57it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.51it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:28,  6.50it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.49it/s][A
Training:  26%|██▌ 

Epoch: 8/29 - Loss: 3.1113 - Accuracy: 0.9508



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:24,  1.17it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.99it/s][A
 30%|███       | 9/30 [00:01<00:03,  5.25it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.91it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.30it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.48it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.61it/s][A
100%|██████████| 30/30 [00:04<00:00,  6.06it/s][A
Epochs:  28%|██▊       | 8/29 [05:25<14:13, 40.63s/it]

Val Loss: 3.7938 - Val Accuracy: 0.9373



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:21,  1.17it/s][A
Training:   2%|▏         | 5/237 [00:01<00:58,  3.94it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.15it/s][A
Training:   5%|▌         | 13/237 [00:02<00:38,  5.77it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.19it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.30it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.38it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.49it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.56it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.62it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.64it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.71it/s][A
Training:  20%|██        | 48/237 [00:07<00:22,  8.28it/s][A
Training:  21%|██        | 50/237 [00:08<00:28,  6.60it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:29,  6.32it/s][A
Training:  24%|██▍ 

Epoch: 9/29 - Loss: 2.8651 - Accuracy: 0.9517



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:26,  1.11it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.91it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.11it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.84it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.18it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.40it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.57it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.98it/s][A
Epochs:  31%|███       | 9/29 [06:05<13:32, 40.65s/it]

Val Loss: 3.9735 - Val Accuracy: 0.9368



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:35,  1.09it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.85it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.02it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.64it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.04it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.26it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.33it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.47it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.53it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.60it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.68it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.75it/s][A
Training:  21%|██        | 49/237 [00:08<00:27,  6.76it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.78it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.79it/s][A
Training:  26%|██▌ 

Epoch: 10/29 - Loss: 2.9782 - Accuracy: 0.9511



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:24,  1.17it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  4.00it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.23it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.82it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.23it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.50it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.62it/s][A
100%|██████████| 30/30 [00:04<00:00,  6.07it/s][A
Epochs:  34%|███▍      | 10/29 [06:46<12:51, 40.62s/it]

Val Loss: 3.1719 - Val Accuracy: 0.9423



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:25,  1.15it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.92it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.16it/s][A
Training:   5%|▌         | 13/237 [00:02<00:38,  5.76it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.14it/s][A
Training:   9%|▉         | 21/237 [00:03<00:33,  6.42it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.44it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.60it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.62it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.64it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.71it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.70it/s][A
Training:  21%|██        | 49/237 [00:07<00:28,  6.61it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.68it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.69it/s][A
Training:  26%|██▌ 

Epoch: 11/29 - Loss: 2.7980 - Accuracy: 0.9503



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.12it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.90it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.04it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.72it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.06it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.27it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.40it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.89it/s][A
Epochs:  38%|███▊      | 11/29 [07:27<12:15, 40.89s/it]

Val Loss: 3.0676 - Val Accuracy: 0.9407



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:26,  1.14it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.91it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.08it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.74it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.13it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.27it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.48it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.55it/s][A
Training:  14%|█▎        | 32/237 [00:05<00:25,  8.12it/s][A
Training:  14%|█▍        | 34/237 [00:05<00:31,  6.52it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:32,  6.22it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:30,  6.41it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.49it/s][A
Training:  21%|██        | 49/237 [00:07<00:28,  6.61it/s][A
Training:  22%|██▏       | 52/237 [00:08<00:22,  8.27it/s][A
Training:  23%|██▎ 

Epoch: 12/29 - Loss: 2.7866 - Accuracy: 0.9529



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.14it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.79it/s][A
 30%|███       | 9/30 [00:02<00:04,  4.97it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.60it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.99it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.19it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.32it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.80it/s][A
Epochs:  41%|████▏     | 12/29 [08:09<11:36, 40.99s/it]

Val Loss: 3.5341 - Val Accuracy: 0.9428



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:26,  1.14it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.88it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.11it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.66it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.12it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.28it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.40it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.47it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.57it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.62it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.61it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.71it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.69it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.70it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.67it/s][A
Training:  26%|██▌ 

Epoch: 13/29 - Loss: 2.7262 - Accuracy: 0.9519



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.15it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.89it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.05it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.65it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.00it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.22it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.37it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.86it/s][A
Epochs:  45%|████▍     | 13/29 [08:50<10:56, 41.06s/it]

Val Loss: 3.4186 - Val Accuracy: 0.9401



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:36,  1.09it/s][A
Training:   2%|▏         | 5/237 [00:01<01:01,  3.79it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.02it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.60it/s][A
Training:   7%|▋         | 17/237 [00:03<00:37,  5.90it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.23it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.36it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.48it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.51it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.59it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.61it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.60it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.62it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.62it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.63it/s][A
Training:  26%|██▌ 

Epoch: 14/29 - Loss: 2.7746 - Accuracy: 0.9517



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.12it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.89it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.09it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.71it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.01it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.27it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.44it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.88it/s][A
Epochs:  48%|████▊     | 14/29 [09:31<10:16, 41.12s/it]

Val Loss: 3.5627 - Val Accuracy: 0.9396



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:25,  1.15it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.91it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  4.98it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.56it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  5.99it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.26it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.32it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.43it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.52it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.56it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.57it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.63it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.61it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.63it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.62it/s][A
Training:  26%|██▌ 

Epoch: 15/29 - Loss: 2.6371 - Accuracy: 0.9539



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.13it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.94it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.10it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.72it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.03it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.28it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.48it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.92it/s][A
Epochs:  52%|█████▏    | 15/29 [10:12<09:36, 41.15s/it]

Val Loss: 3.7570 - Val Accuracy: 0.9401



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:35,  1.10it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.82it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.00it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.61it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.05it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.26it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.41it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.46it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.54it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.56it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.62it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.66it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.68it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.67it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.65it/s][A
Training:  26%|██▌ 

Epoch: 16/29 - Loss: 2.6845 - Accuracy: 0.9544



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:26,  1.09it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.76it/s][A
 30%|███       | 9/30 [00:02<00:04,  4.92it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.55it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.89it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.13it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.29it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.75it/s][A
Epochs:  55%|█████▌    | 16/29 [10:54<08:56, 41.25s/it]

Val Loss: 3.4929 - Val Accuracy: 0.9451



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:26,  1.14it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.84it/s][A
Training:   4%|▍         | 9/237 [00:02<00:46,  4.91it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.51it/s][A
Training:   7%|▋         | 17/237 [00:03<00:37,  5.90it/s][A
Training:   9%|▉         | 21/237 [00:03<00:35,  6.10it/s][A
Training:  11%|█         | 25/237 [00:04<00:34,  6.17it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.32it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.42it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.47it/s][A
Training:  17%|█▋        | 41/237 [00:07<00:30,  6.45it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.46it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.49it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:28,  6.47it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.49it/s][A
Training:  26%|██▌ 

Epoch: 17/29 - Loss: 2.7980 - Accuracy: 0.9541



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.15it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.82it/s][A
 30%|███       | 9/30 [00:02<00:04,  4.94it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.53it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.89it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.13it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.26it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.80it/s][A
Epochs:  59%|█████▊    | 17/29 [11:36<08:17, 41.49s/it]

Val Loss: 3.5568 - Val Accuracy: 0.9445



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:24,  1.15it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.88it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.03it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.61it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.00it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.22it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.37it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.46it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.51it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.58it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.58it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.55it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.54it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:28,  6.51it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.48it/s][A
Training:  26%|██▌ 

Epoch: 18/29 - Loss: 2.7295 - Accuracy: 0.9522



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.14it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.89it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.04it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.60it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.90it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.09it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.29it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.76it/s][A
Epochs:  62%|██████▏   | 18/29 [12:18<07:38, 41.64s/it]

Val Loss: 3.4013 - Val Accuracy: 0.9385



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:37,  1.08it/s][A
Training:   2%|▏         | 5/237 [00:01<01:01,  3.76it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  4.97it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.58it/s][A
Training:   7%|▋         | 17/237 [00:03<00:37,  5.94it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.25it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.41it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.46it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.48it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.55it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.58it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.57it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.60it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.63it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.68it/s][A
Training:  26%|██▌ 

Epoch: 19/29 - Loss: 2.7152 - Accuracy: 0.9560



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:26,  1.10it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.68it/s][A
 30%|███       | 9/30 [00:02<00:04,  4.80it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.41it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.81it/s][A
 70%|███████   | 21/30 [00:04<00:01,  6.00it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.13it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.61it/s][A
Epochs:  66%|██████▌   | 19/29 [13:00<06:57, 41.72s/it]

Val Loss: 4.1116 - Val Accuracy: 0.9434



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:37,  1.09it/s][A
Training:   2%|▏         | 5/237 [00:01<01:02,  3.73it/s][A
Training:   4%|▍         | 9/237 [00:02<00:47,  4.84it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.50it/s][A
Training:   7%|▋         | 17/237 [00:03<00:37,  5.84it/s][A
Training:   9%|▉         | 21/237 [00:04<00:35,  6.02it/s][A
Training:  11%|█         | 25/237 [00:04<00:34,  6.10it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:33,  6.23it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:32,  6.30it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:31,  6.32it/s][A
Training:  17%|█▋        | 41/237 [00:07<00:30,  6.40it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:30,  6.35it/s][A
Training:  21%|██        | 49/237 [00:08<00:29,  6.39it/s][A
Training:  22%|██▏       | 53/237 [00:09<00:28,  6.38it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:28,  6.35it/s][A
Training:  26%|██▌ 

Epoch: 20/29 - Loss: 2.7853 - Accuracy: 0.9527



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:26,  1.08it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.73it/s][A
 30%|███       | 9/30 [00:02<00:04,  4.81it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.33it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.72it/s][A
 70%|███████   | 21/30 [00:04<00:01,  5.92it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.08it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.58it/s][A
Epochs:  69%|██████▉   | 20/29 [13:42<06:17, 41.99s/it]

Val Loss: 3.6546 - Val Accuracy: 0.9406



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:39,  1.07it/s][A
Training:   2%|▏         | 5/237 [00:01<01:02,  3.74it/s][A
Training:   4%|▍         | 9/237 [00:02<00:46,  4.89it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.51it/s][A
Training:   7%|▋         | 17/237 [00:03<00:37,  5.92it/s][A
Training:   9%|▉         | 21/237 [00:03<00:35,  6.12it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.31it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.42it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.43it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:31,  6.44it/s][A
Training:  17%|█▋        | 41/237 [00:07<00:30,  6.50it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:29,  6.51it/s][A
Training:  20%|██        | 48/237 [00:07<00:23,  8.00it/s][A
Training:  21%|██        | 50/237 [00:08<00:29,  6.39it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:30,  6.06it/s][A
Training:  23%|██▎ 

Epoch: 21/29 - Loss: 2.6762 - Accuracy: 0.9538



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:26,  1.10it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.83it/s][A
 30%|███       | 9/30 [00:02<00:04,  4.98it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.63it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  5.99it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.19it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.33it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.81it/s][A
Epochs:  72%|███████▏  | 21/29 [14:24<05:35, 41.94s/it]

Val Loss: 3.6502 - Val Accuracy: 0.9440



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:27,  1.14it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.90it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.07it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.73it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.09it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.23it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.41it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.53it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.59it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.62it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.61it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.66it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.71it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.71it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.74it/s][A
Training:  26%|██▌ 

Epoch: 22/29 - Loss: 2.6447 - Accuracy: 0.9553



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.14it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.87it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.07it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.66it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.01it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.24it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.40it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.86it/s][A
Epochs:  76%|███████▌  | 22/29 [15:06<04:52, 41.78s/it]

Val Loss: 3.7091 - Val Accuracy: 0.9458



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:34,  1.10it/s][A
Training:   2%|▏         | 5/237 [00:01<01:01,  3.79it/s][A
Training:   4%|▍         | 9/237 [00:02<00:46,  4.91it/s][A
Training:   5%|▌         | 13/237 [00:02<00:40,  5.58it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  5.96it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.21it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.37it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.52it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.56it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.62it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.64it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.69it/s][A
Training:  21%|██        | 49/237 [00:08<00:27,  6.72it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.69it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.73it/s][A
Training:  26%|██▌ 

Epoch: 23/29 - Loss: 2.6482 - Accuracy: 0.9572



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:24,  1.16it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.94it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.15it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.75it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.14it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.33it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.52it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.95it/s][A
Epochs:  79%|███████▉  | 23/29 [15:47<04:09, 41.59s/it]

Val Loss: 4.0151 - Val Accuracy: 0.9445



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:23,  1.16it/s][A
Training:   2%|▏         | 5/237 [00:01<00:58,  3.93it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.03it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.64it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  5.99it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.20it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.31it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.47it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.53it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.58it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.62it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.65it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.69it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.66it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:27,  6.65it/s][A
Training:  26%|██▌ 

Epoch: 24/29 - Loss: 2.6473 - Accuracy: 0.9541



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.16it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.94it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.10it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.72it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.07it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.33it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.48it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.96it/s][A
Epochs:  83%|████████▎ | 24/29 [16:28<03:27, 41.42s/it]

Val Loss: 3.6109 - Val Accuracy: 0.9423



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:29,  1.13it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.81it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  4.99it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.66it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.02it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.27it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.45it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.60it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.61it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:29,  6.71it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.73it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.73it/s][A
Training:  21%|██        | 49/237 [00:07<00:28,  6.70it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.70it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.76it/s][A
Training:  26%|██▌ 

Epoch: 25/29 - Loss: 2.6504 - Accuracy: 0.9559



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.13it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.89it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.09it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.74it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.13it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.34it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.52it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.95it/s][A
Epochs:  86%|████████▌ | 25/29 [17:09<02:44, 41.25s/it]

Val Loss: 3.7838 - Val Accuracy: 0.9451



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:34,  1.10it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.85it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.01it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.72it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.10it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.32it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.43it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.54it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.56it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.61it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.61it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.65it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.70it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.73it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.75it/s][A
Training:  26%|██▌ 

Epoch: 26/29 - Loss: 2.5543 - Accuracy: 0.9563



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.12it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.81it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.05it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.71it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.10it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.33it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.51it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.92it/s][A
Epochs:  90%|████████▉ | 26/29 [17:49<02:03, 41.11s/it]

Val Loss: 3.6154 - Val Accuracy: 0.9389



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:32,  1.11it/s][A
Training:   2%|▏         | 5/237 [00:01<01:00,  3.84it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.02it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.65it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  6.03it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.29it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.40it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:31,  6.56it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.61it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.64it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.64it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.64it/s][A
Training:  21%|██        | 49/237 [00:08<00:28,  6.61it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.70it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.72it/s][A
Training:  26%|██▌ 

Epoch: 27/29 - Loss: 2.8503 - Accuracy: 0.9577



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:26,  1.10it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.84it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.02it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.68it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.05it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.28it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.48it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.90it/s][A
Epochs:  93%|█████████▎| 27/29 [18:31<01:22, 41.15s/it]

Val Loss: 3.4463 - Val Accuracy: 0.9479



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:23,  1.16it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.92it/s][A
Training:   4%|▍         | 9/237 [00:02<00:44,  5.10it/s][A
Training:   5%|▌         | 13/237 [00:02<00:38,  5.77it/s][A
Training:   7%|▋         | 17/237 [00:03<00:35,  6.14it/s][A
Training:   9%|▉         | 21/237 [00:03<00:34,  6.29it/s][A
Training:  11%|█         | 25/237 [00:04<00:32,  6.46it/s][A
Training:  12%|█▏        | 29/237 [00:04<00:31,  6.57it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:30,  6.60it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.63it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.69it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.72it/s][A
Training:  21%|██        | 49/237 [00:07<00:27,  6.77it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:27,  6.72it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.78it/s][A
Training:  26%|██▌ 

Epoch: 28/29 - Loss: 2.6758 - Accuracy: 0.9567



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:25,  1.13it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.82it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.01it/s][A
 43%|████▎     | 13/30 [00:02<00:03,  5.65it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.07it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.29it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.48it/s][A
100%|██████████| 30/30 [00:05<00:00,  5.90it/s][A
Epochs:  97%|█████████▋| 28/29 [19:12<00:41, 41.07s/it]

Val Loss: 3.4847 - Val Accuracy: 0.9377



Training:   0%|          | 0/237 [00:00<?, ?it/s][A
Training:   0%|          | 1/237 [00:00<03:31,  1.12it/s][A
Training:   2%|▏         | 5/237 [00:01<00:59,  3.89it/s][A
Training:   4%|▍         | 9/237 [00:02<00:45,  5.05it/s][A
Training:   5%|▌         | 13/237 [00:02<00:39,  5.64it/s][A
Training:   7%|▋         | 17/237 [00:03<00:36,  5.98it/s][A
Training:   9%|▉         | 21/237 [00:03<00:35,  6.16it/s][A
Training:  11%|█         | 25/237 [00:04<00:33,  6.25it/s][A
Training:  12%|█▏        | 29/237 [00:05<00:32,  6.38it/s][A
Training:  14%|█▍        | 33/237 [00:05<00:31,  6.46it/s][A
Training:  16%|█▌        | 37/237 [00:06<00:30,  6.56it/s][A
Training:  17%|█▋        | 41/237 [00:06<00:29,  6.62it/s][A
Training:  19%|█▉        | 45/237 [00:07<00:28,  6.69it/s][A
Training:  21%|██        | 49/237 [00:08<00:27,  6.78it/s][A
Training:  22%|██▏       | 53/237 [00:08<00:26,  6.82it/s][A
Training:  24%|██▍       | 57/237 [00:09<00:26,  6.74it/s][A
Training:  26%|██▌ 

Epoch: 29/29 - Loss: 2.6874 - Accuracy: 0.9571



  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:00<00:24,  1.17it/s][A
 17%|█▋        | 5/30 [00:01<00:06,  3.89it/s][A
 30%|███       | 9/30 [00:02<00:04,  5.08it/s][A
 43%|████▎     | 13/30 [00:02<00:02,  5.74it/s][A
 57%|█████▋    | 17/30 [00:03<00:02,  6.16it/s][A
 70%|███████   | 21/30 [00:03<00:01,  6.38it/s][A
 83%|████████▎ | 25/30 [00:04<00:00,  6.56it/s][A
100%|██████████| 30/30 [00:04<00:00,  6.01it/s][A
Epochs: 100%|██████████| 29/29 [19:52<00:00, 41.14s/it]
[32m[I 2023-12-07 10:03:40,533][0m Trial 16 finished with value: 0.9450814723968506 and parameters: {'learning_rate': 0.00032038885686179594, 'weight_decay': 0.0012417026259634344, 'epsilon': 3.2008346584419735e-08, 'batch_size': 59, 'epochs': 29}. Best is trial 16 with value: 0.9450814723968506.[0m


Val Loss: 3.2812 - Val Accuracy: 0.9451
Saving best model...
Learning rate: 1.385738101190497e-05
Weight decay: 0.001443927339878984
Epsilon: 3.5875023325100516e-08
Batch size: 99
Number of epochs: 30


Epochs:   0%|          | 0/30 [00:00<?, ?it/s]
Training:   0%|          | 0/142 [00:00<?, ?it/s][A
Training:   1%|          | 1/142 [00:01<03:04,  1.31s/it][A
Training:   4%|▎         | 5/142 [00:02<00:55,  2.47it/s][A
Training:   6%|▋         | 9/142 [00:03<00:42,  3.14it/s][A
Training:   9%|▉         | 13/142 [00:04<00:36,  3.50it/s][A
Training:  12%|█▏        | 17/142 [00:05<00:33,  3.68it/s][A
Training:  15%|█▍        | 21/142 [00:06<00:31,  3.80it/s][A
Training:  18%|█▊        | 25/142 [00:07<00:30,  3.88it/s][A
Training:  20%|██        | 29/142 [00:08<00:28,  3.93it/s][A
Training:  23%|██▎       | 33/142 [00:09<00:27,  3.96it/s][A
Training:  26%|██▌       | 37/142 [00:10<00:26,  3.99it/s][A
Training:  29%|██▉       | 41/142 [00:11<00:25,  4.02it/s][A
Training:  32%|███▏      | 45/142 [00:12<00:23,  4.06it/s][A
Training:  34%|███▍      | 48/142 [00:12<00:18,  5.13it/s][A
Training:  35%|███▌      | 50/142 [00:13<00:22,  4.00it/s][A
Training:  37%|███▋      | 53/142 [

Epoch: 1/30 - Loss: 14.3848 - Accuracy: 0.5924



  0%|          | 0/18 [00:00<?, ?it/s][A
  6%|▌         | 1/18 [00:01<00:22,  1.31s/it][A
 28%|██▊       | 5/18 [00:02<00:05,  2.49it/s][A
 50%|█████     | 9/18 [00:03<00:02,  3.19it/s][A
 72%|███████▏  | 13/18 [00:04<00:01,  3.54it/s][A
100%|██████████| 18/18 [00:05<00:00,  3.43it/s][A
Epochs:   0%|          | 0/30 [00:41<?, ?it/s]
[32m[I 2023-12-07 10:04:22,153][0m Trial 17 pruned. [0m


Val Loss: 13.2923 - Val Accuracy: 0.6405
Learning rate: 6.793226034951654e-05
Weight decay: 0.003024879464998162
Epsilon: 2.8023989865494093e-08
Batch size: 62
Number of epochs: 31


Epochs:   0%|          | 0/31 [00:00<?, ?it/s]
Training:   0%|          | 0/226 [00:00<?, ?it/s][A
Training:   0%|          | 1/226 [00:00<03:31,  1.07it/s][A
Training:   2%|▏         | 5/226 [00:01<00:59,  3.71it/s][A
Training:   4%|▍         | 9/226 [00:02<00:44,  4.89it/s][A
Training:   6%|▌         | 13/226 [00:02<00:39,  5.44it/s][A
Training:   8%|▊         | 17/226 [00:03<00:36,  5.68it/s][A
Training:   9%|▉         | 21/226 [00:04<00:34,  5.86it/s][A
Training:  11%|█         | 25/226 [00:04<00:33,  5.98it/s][A
Training:  13%|█▎        | 29/226 [00:05<00:32,  6.10it/s][A
Training:  15%|█▍        | 33/226 [00:05<00:31,  6.14it/s][A
Training:  16%|█▋        | 37/226 [00:06<00:30,  6.17it/s][A
Training:  18%|█▊        | 41/226 [00:07<00:29,  6.28it/s][A
Training:  20%|█▉        | 45/226 [00:07<00:28,  6.27it/s][A
Training:  22%|██▏       | 49/226 [00:08<00:27,  6.34it/s][A
Training:  23%|██▎       | 53/226 [00:09<00:26,  6.43it/s][A
Training:  25%|██▌       | 57/226 [

Epoch: 1/31 - Loss: 11.0121 - Accuracy: 0.7629



  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:25,  1.09it/s][A
 17%|█▋        | 5/29 [00:01<00:06,  3.78it/s][A
 31%|███       | 9/29 [00:02<00:04,  4.86it/s][A
 45%|████▍     | 13/29 [00:02<00:02,  5.45it/s][A
 59%|█████▊    | 17/29 [00:03<00:02,  5.83it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  6.07it/s][A
100%|██████████| 29/29 [00:04<00:00,  6.09it/s][A
Epochs:   0%|          | 0/31 [00:40<?, ?it/s]
[32m[I 2023-12-07 10:05:03,284][0m Trial 18 pruned. [0m


Val Loss: 7.9725 - Val Accuracy: 0.8883
Learning rate: 0.00728786058983412
Weight decay: 0.0008196611868150341
Epsilon: 3.030236894033943e-08
Batch size: 10
Number of epochs: 60


Epochs:   0%|          | 0/60 [00:00<?, ?it/s]
Training:   0%|          | 0/1398 [00:00<?, ?it/s][A
Training:   0%|          | 1/1398 [00:00<07:48,  2.98it/s][A
Training:   0%|          | 5/1398 [00:00<01:42, 13.60it/s][A
Training:   1%|          | 9/1398 [00:00<01:05, 21.09it/s][A
Training:   1%|          | 13/1398 [00:00<00:52, 26.38it/s][A
Training:   1%|          | 17/1398 [00:00<00:46, 29.91it/s][A
Training:   2%|▏         | 21/1398 [00:00<00:42, 32.06it/s][A
Training:   2%|▏         | 26/1398 [00:00<00:38, 36.03it/s][A
Training:   2%|▏         | 30/1398 [00:01<00:36, 37.05it/s][A
Training:   2%|▏         | 34/1398 [00:01<00:36, 37.34it/s][A
Training:   3%|▎         | 38/1398 [00:01<00:36, 37.32it/s][A
Training:   3%|▎         | 42/1398 [00:01<00:36, 37.56it/s][A
Training:   3%|▎         | 46/1398 [00:01<00:36, 37.37it/s][A
Training:   4%|▎         | 50/1398 [00:01<00:36, 37.11it/s][A
Training:   4%|▍         | 54/1398 [00:01<00:35, 37.45it/s][A
Training:   4%|▍    

Training:  79%|███████▉  | 1101/1398 [00:28<00:06, 42.49it/s][A
Training:  79%|███████▉  | 1106/1398 [00:28<00:08, 36.44it/s][A
Training:  79%|███████▉  | 1110/1398 [00:29<00:07, 36.52it/s][A
Training:  80%|███████▉  | 1114/1398 [00:29<00:07, 36.99it/s][A
Training:  80%|████████  | 1119/1398 [00:29<00:06, 40.33it/s][A
Training:  80%|████████  | 1124/1398 [00:29<00:06, 42.37it/s][A
Training:  81%|████████  | 1129/1398 [00:29<00:06, 42.82it/s][A
Training:  81%|████████  | 1134/1398 [00:29<00:07, 36.66it/s][A
Training:  81%|████████▏ | 1138/1398 [00:29<00:06, 37.45it/s][A
Training:  82%|████████▏ | 1143/1398 [00:29<00:06, 40.12it/s][A
Training:  82%|████████▏ | 1148/1398 [00:29<00:05, 42.02it/s][A
Training:  82%|████████▏ | 1153/1398 [00:30<00:05, 43.56it/s][A
Training:  83%|████████▎ | 1158/1398 [00:30<00:06, 36.05it/s][A
Training:  83%|████████▎ | 1162/1398 [00:30<00:06, 36.64it/s][A
Training:  83%|████████▎ | 1166/1398 [00:30<00:06, 37.34it/s][A
Training:  84%|████████▎ 

Epoch: 1/60 - Loss: 2305.4128 - Accuracy: 0.8644



  0%|          | 0/175 [00:00<?, ?it/s][A
  1%|          | 1/175 [00:00<00:56,  3.07it/s][A
  3%|▎         | 5/175 [00:00<00:11, 14.23it/s][A
  5%|▌         | 9/175 [00:00<00:07, 21.75it/s][A
  7%|▋         | 13/175 [00:00<00:06, 26.96it/s][A
 10%|█         | 18/175 [00:00<00:04, 33.53it/s][A
 13%|█▎        | 22/175 [00:00<00:04, 34.78it/s][A
 15%|█▌        | 27/175 [00:00<00:03, 38.05it/s][A
 18%|█▊        | 32/175 [00:01<00:03, 40.28it/s][A
 21%|██        | 37/175 [00:01<00:03, 34.91it/s][A
 23%|██▎       | 41/175 [00:01<00:03, 36.03it/s][A
 26%|██▌       | 45/175 [00:01<00:03, 36.91it/s][A
 28%|██▊       | 49/175 [00:01<00:03, 36.99it/s][A
 31%|███▏      | 55/175 [00:01<00:02, 42.24it/s][A
 34%|███▍      | 60/175 [00:01<00:02, 43.24it/s][A
 37%|███▋      | 65/175 [00:01<00:03, 36.10it/s][A
 40%|████      | 70/175 [00:02<00:02, 39.44it/s][A
 43%|████▎     | 75/175 [00:02<00:02, 40.80it/s][A
 46%|████▌     | 80/175 [00:02<00:02, 42.33it/s][A
 49%|████▊     | 85/175

Val Loss: 3280.0183 - Val Accuracy: 0.8920
Learning rate: 0.0005624769068457545
Weight decay: 0.0021223853891047125
Epsilon: 1.7002291132922094e-08
Batch size: 83
Number of epochs: 44


Epochs:   0%|          | 0/44 [00:00<?, ?it/s]
Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:14,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.87it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.69it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.12it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.37it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.51it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.63it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.70it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.75it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.77it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.78it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.79it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.79it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.81it/s][A
Training:  34%|███▎      | 57/169 [

Epoch: 1/44 - Loss: 7.8395 - Accuracy: 0.8876



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.91it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.74it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.17it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.42it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.14it/s][A
Epochs:   2%|▏         | 1/44 [00:40<29:07, 40.65s/it]

Val Loss: 7.1902 - Val Accuracy: 0.9337



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:08,  1.12s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.92it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.74it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.17it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.44it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.56it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.67it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.72it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.75it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.77it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.81it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.75it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.79it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.82it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.81it/s][A
Training:  36%|███▌

Epoch: 2/44 - Loss: 5.2786 - Accuracy: 0.9367



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.75it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.16it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.43it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:   5%|▍         | 2/44 [01:21<28:28, 40.67s/it]

Val Loss: 5.5710 - Val Accuracy: 0.9392



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:09,  1.13s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.89it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.74it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.21it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.42it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.57it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.67it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.73it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.77it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.81it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.84it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.89it/s][A
Training:  29%|██▉       | 49/169 [00:10<00:24,  4.84it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.84it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.82it/s][A
Training:  36%|███▌

Epoch: 3/44 - Loss: 4.8836 - Accuracy: 0.9441



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.86it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.71it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.15it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.38it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.12it/s][A
Epochs:   7%|▋         | 3/44 [02:02<27:50, 40.75s/it]

Val Loss: 5.5337 - Val Accuracy: 0.9315



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:17,  1.18s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.82it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.66it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.09it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.36it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.53it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.62it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:30,  4.65it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.69it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.71it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.79it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.83it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.82it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.85it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.87it/s][A
Training:  36%|███▌

Epoch: 4/44 - Loss: 4.8486 - Accuracy: 0.9453



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.72it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.10it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.40it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.13it/s][A
Epochs:   9%|▉         | 4/44 [02:42<27:11, 40.78s/it]

Val Loss: 4.8674 - Val Accuracy: 0.9403



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:15,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.86it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.68it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.10it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.37it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.53it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.59it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.68it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.72it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.77it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.83it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.86it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.83it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.82it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.83it/s][A
Training:  36%|███▌

Epoch: 5/44 - Loss: 4.5016 - Accuracy: 0.9470



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.88it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.71it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.14it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.40it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.14it/s][A
Epochs:  11%|█▏        | 5/44 [03:23<26:30, 40.79s/it]

Val Loss: 5.3899 - Val Accuracy: 0.9419



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:07,  1.12s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.92it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.75it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.16it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.40it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.55it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.61it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.67it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.71it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.76it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.79it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.84it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.82it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.83it/s][A
Training:  36%|███▌

Epoch: 6/44 - Loss: 4.5782 - Accuracy: 0.9483



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.14s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.92it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.76it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.17it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.39it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.14it/s][A
Epochs:  14%|█▎        | 6/44 [04:04<25:49, 40.78s/it]

Val Loss: 5.1409 - Val Accuracy: 0.9387



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:20,  1.20s/it][A
Training:   3%|▎         | 5/169 [00:02<01:03,  2.56it/s][A
Training:   5%|▌         | 9/169 [00:03<00:50,  3.18it/s][A
Training:   8%|▊         | 13/169 [00:04<00:43,  3.57it/s][A
Training:  10%|█         | 17/169 [00:05<00:40,  3.73it/s][A
Training:  11%|█         | 19/169 [00:05<00:33,  4.52it/s][A
Training:  12%|█▏        | 21/169 [00:06<00:40,  3.64it/s][A
Training:  14%|█▎        | 23/169 [00:06<00:31,  4.57it/s][A
Training:  15%|█▍        | 25/169 [00:07<00:38,  3.77it/s][A
Training:  16%|█▌        | 27/169 [00:07<00:29,  4.82it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:36,  3.85it/s][A
Training:  18%|█▊        | 31/169 [00:08<00:27,  4.97it/s][A
Training:  20%|█▉        | 33/169 [00:08<00:36,  3.77it/s][A
Training:  21%|██▏       | 36/169 [00:09<00:23,  5.61it/s][A
Training:  22%|██▏       | 38/169 [00:09<00:32,  4.00it/s][A
Training:  24%|██▍ 

Epoch: 7/44 - Loss: 4.5487 - Accuracy: 0.9478



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.75it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.18it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.47it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:  16%|█▌        | 7/44 [04:50<26:16, 42.60s/it]

Val Loss: 5.0881 - Val Accuracy: 0.9463



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.85it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.67it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.11it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.33it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.49it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.56it/s][A
Training:  17%|█▋        | 28/169 [00:06<00:24,  5.86it/s][A
Training:  18%|█▊        | 30/169 [00:07<00:30,  4.61it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:31,  4.29it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:29,  4.44it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.58it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.66it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.66it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.71it/s][A
Training:  34%|███▎

Epoch: 8/44 - Loss: 4.3753 - Accuracy: 0.9504



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.72it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.15it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.47it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:  18%|█▊        | 8/44 [05:32<25:20, 42.24s/it]

Val Loss: 5.1965 - Val Accuracy: 0.9452



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:14,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:02<00:57,  2.83it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.57it/s][A
Training:   8%|▊         | 13/169 [00:03<00:39,  3.98it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.25it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.42it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.55it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.60it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.63it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.64it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.69it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.71it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.76it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.76it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.78it/s][A
Training:  36%|███▌

Epoch: 9/44 - Loss: 4.6098 - Accuracy: 0.9509



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.14s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.88it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.70it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.40it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.11it/s][A
Epochs:  20%|██        | 9/44 [06:13<24:27, 41.92s/it]

Val Loss: 5.3921 - Val Accuracy: 0.9419



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:12,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.86it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.68it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.12it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.36it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.44it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.53it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.60it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.65it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.71it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.78it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.75it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.75it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.75it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.76it/s][A
Training:  36%|███▌

Epoch: 10/44 - Loss: 4.3994 - Accuracy: 0.9503



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.17s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.64it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.09it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.33it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.04it/s][A
Epochs:  23%|██▎       | 10/44 [06:54<23:39, 41.76s/it]

Val Loss: 5.2345 - Val Accuracy: 0.9387



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:21,  1.20s/it][A
Training:   3%|▎         | 5/169 [00:02<00:59,  2.77it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.61it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.02it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.28it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.43it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.54it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.58it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.64it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.71it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.75it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.79it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.79it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.77it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.77it/s][A
Training:  36%|███▌

Epoch: 11/44 - Loss: 4.6433 - Accuracy: 0.9522



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.88it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.65it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.10it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.34it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.06it/s][A
Epochs:  25%|██▌       | 11/44 [07:36<22:53, 41.61s/it]

Val Loss: 5.5847 - Val Accuracy: 0.9452



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:11,  1.14s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.86it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.68it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.10it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.34it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.54it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.62it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.69it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.70it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.72it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.76it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.80it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.77it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.78it/s][A
Training:  36%|███▌

Epoch: 12/44 - Loss: 4.3722 - Accuracy: 0.9510



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.90it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.74it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.20it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.46it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:  27%|██▋       | 12/44 [08:17<22:04, 41.38s/it]

Val Loss: 5.5051 - Val Accuracy: 0.9441



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:07,  1.11s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.89it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.70it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.13it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.34it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.51it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.62it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.70it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.78it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.82it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.79it/s][A
Training:  26%|██▌       | 44/169 [00:09<00:20,  6.05it/s][A
Training:  27%|██▋       | 46/169 [00:10<00:25,  4.85it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:27,  4.41it/s][A
Training:  31%|███       | 52/169 [00:11<00:19,  5.87it/s][A
Training:  32%|███▏

Epoch: 13/44 - Loss: 4.6855 - Accuracy: 0.9513



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.86it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.73it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.15it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.41it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.14it/s][A
Epochs:  30%|██▉       | 13/44 [08:58<21:21, 41.35s/it]

Val Loss: 5.4704 - Val Accuracy: 0.9414



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:18,  1.18s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.81it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.65it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.07it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.32it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.47it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.58it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.66it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.71it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.68it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.71it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.72it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.73it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.69it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.74it/s][A
Training:  36%|███▌

Epoch: 14/44 - Loss: 4.4003 - Accuracy: 0.9511



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.17s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.80it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.60it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.05it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.30it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.04it/s][A
Epochs:  32%|███▏      | 14/44 [09:39<20:40, 41.36s/it]

Val Loss: 5.3637 - Val Accuracy: 0.9430



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:16,  1.17s/it][A
Training:   3%|▎         | 5/169 [00:02<00:57,  2.84it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.69it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.11it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.33it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.50it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.61it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.68it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.74it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.74it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.74it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.73it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.74it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.76it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.77it/s][A
Training:  36%|███▌

Epoch: 15/44 - Loss: 4.4902 - Accuracy: 0.9511



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.85it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.68it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.10it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.36it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.08it/s][A
Epochs:  34%|███▍      | 15/44 [10:21<19:58, 41.33s/it]

Val Loss: 5.8488 - Val Accuracy: 0.9403



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:15,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.90it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.75it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.20it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.42it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.56it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.66it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.70it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.74it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.76it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.75it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.73it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.76it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.74it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.73it/s][A
Training:  36%|███▌

Epoch: 16/44 - Loss: 4.7163 - Accuracy: 0.9517



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.14s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.87it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.70it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.34it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.11it/s][A
Epochs:  36%|███▋      | 16/44 [11:01<19:14, 41.21s/it]

Val Loss: 5.8677 - Val Accuracy: 0.9419



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:21,  1.20s/it][A
Training:   3%|▎         | 5/169 [00:02<00:57,  2.83it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.64it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.10it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.35it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.48it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.58it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.62it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.68it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.69it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.74it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.73it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.78it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.81it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.84it/s][A
Training:  36%|███▌

Epoch: 17/44 - Loss: 4.5340 - Accuracy: 0.9533



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.88it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.67it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.08it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.33it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.08it/s][A
Epochs:  39%|███▊      | 17/44 [11:43<18:32, 41.20s/it]

Val Loss: 5.9080 - Val Accuracy: 0.9414



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:17,  1.17s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.80it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.64it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.11it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.38it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.56it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.63it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.68it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.72it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.77it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.81it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.84it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.84it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.86it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.85it/s][A
Training:  36%|███▌

Epoch: 18/44 - Loss: 4.7936 - Accuracy: 0.9538



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.72it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.41it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:  41%|████      | 18/44 [12:23<17:48, 41.08s/it]

Val Loss: 6.1281 - Val Accuracy: 0.9414



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:09,  1.13s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.90it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.69it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.14it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.39it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.51it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.61it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.68it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.72it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.73it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.78it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.84it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.86it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:22,  4.90it/s][A
Training:  36%|███▌

Epoch: 19/44 - Loss: 4.7515 - Accuracy: 0.9542



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.86it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.69it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.10it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.34it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.09it/s][A
Epochs:  43%|████▎     | 19/44 [13:04<17:05, 41.01s/it]

Val Loss: 6.7000 - Val Accuracy: 0.9441



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.84it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.68it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.05it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.30it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.42it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:32,  4.50it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.57it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.63it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.70it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.75it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.77it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.80it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.80it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.80it/s][A
Training:  36%|███▌

Epoch: 20/44 - Loss: 4.8919 - Accuracy: 0.9542



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.91it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.74it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.14it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.40it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.13it/s][A
Epochs:  45%|████▌     | 20/44 [13:45<16:24, 41.04s/it]

Val Loss: 5.4085 - Val Accuracy: 0.9485



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.85it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.68it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.08it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.34it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.51it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.59it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.62it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.68it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.68it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.74it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.80it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.81it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.83it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.84it/s][A
Training:  36%|███▌

Epoch: 21/44 - Loss: 4.8936 - Accuracy: 0.9533



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.14s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.63it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.03it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.30it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.07it/s][A
Epochs:  48%|████▊     | 21/44 [14:26<15:43, 41.02s/it]

Val Loss: 7.2409 - Val Accuracy: 0.9419



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:16,  1.17s/it][A
Training:   3%|▎         | 5/169 [00:02<00:57,  2.85it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.66it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.06it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.36it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.49it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.60it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.66it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.68it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.72it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.78it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.78it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.80it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.80it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.83it/s][A
Training:  36%|███▌

Epoch: 22/44 - Loss: 4.8878 - Accuracy: 0.9542



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.10s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.92it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.74it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.16it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.43it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.16it/s][A
Epochs:  50%|█████     | 22/44 [15:07<15:00, 40.95s/it]

Val Loss: 6.6921 - Val Accuracy: 0.9474



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:09,  1.13s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.91it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.73it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.18it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.43it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.61it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.66it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.74it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.77it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.78it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.83it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.84it/s][A
Training:  29%|██▉       | 49/169 [00:10<00:24,  4.83it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.85it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.85it/s][A
Training:  36%|███▌

Epoch: 23/44 - Loss: 4.9804 - Accuracy: 0.9539



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.09s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.98it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.83it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.25it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.47it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.21it/s][A
Epochs:  52%|█████▏    | 23/44 [15:48<14:17, 40.83s/it]

Val Loss: 5.9590 - Val Accuracy: 0.9436



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:19,  1.19s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.80it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.64it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.08it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.36it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.55it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.68it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.76it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.77it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.79it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.81it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.82it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.83it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.84it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.86it/s][A
Training:  36%|███▌

Epoch: 24/44 - Loss: 4.7798 - Accuracy: 0.9552



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.94it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.80it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.21it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.46it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.18it/s][A
Epochs:  55%|█████▍    | 24/44 [16:28<13:35, 40.79s/it]

Val Loss: 6.6344 - Val Accuracy: 0.9447



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:04,  1.10s/it][A
Training:   3%|▎         | 5/169 [00:01<00:55,  2.95it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.76it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.16it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.40it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.56it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.67it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.68it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.77it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.80it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.81it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.82it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.84it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.83it/s][A
Training:  36%|███▌

Epoch: 25/44 - Loss: 4.7473 - Accuracy: 0.9548



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.09s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.96it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.78it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.29it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.50it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.20it/s][A
Epochs:  57%|█████▋    | 25/44 [17:09<12:53, 40.72s/it]

Val Loss: 6.0367 - Val Accuracy: 0.9441



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:14,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.87it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.69it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.13it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.37it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.56it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.65it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.73it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.78it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.81it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.80it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.83it/s][A
Training:  31%|███       | 52/169 [00:11<00:19,  6.07it/s][A
Training:  32%|███▏      | 54/169 [00:11<00:23,  4.84it/s][A
Training:  34%|███▎

Epoch: 26/44 - Loss: 4.6659 - Accuracy: 0.9547



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.17s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.85it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.67it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.30it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.04it/s][A
Epochs:  59%|█████▉    | 26/44 [17:50<12:13, 40.76s/it]

Val Loss: 6.7956 - Val Accuracy: 0.9425



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:15,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:02<00:57,  2.83it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.58it/s][A
Training:   8%|▊         | 13/169 [00:03<00:39,  3.99it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.29it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.45it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.56it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.64it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.71it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.75it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.76it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.76it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.74it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.76it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.79it/s][A
Training:  36%|███▌

Epoch: 27/44 - Loss: 4.8266 - Accuracy: 0.9552



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.67it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.10it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.35it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.07it/s][A
Epochs:  61%|██████▏   | 27/44 [18:32<11:40, 41.18s/it]

Val Loss: 6.4642 - Val Accuracy: 0.9469



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.83it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.64it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.01it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.23it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.38it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:32,  4.42it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.55it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.60it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.66it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.69it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.72it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.71it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.74it/s][A
Training:  34%|███▎      | 57/169 [00:13<00:23,  4.71it/s][A
Training:  36%|███▌

Epoch: 28/44 - Loss: 5.0380 - Accuracy: 0.9558



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.85it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.65it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.06it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.27it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.05it/s][A
Epochs:  64%|██████▎   | 28/44 [19:14<11:01, 41.35s/it]

Val Loss: 7.2090 - Val Accuracy: 0.9403



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:14,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.80it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.61it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.05it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.26it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.40it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.53it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.59it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.58it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.62it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.63it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.72it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.74it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.77it/s][A
Training:  34%|███▎      | 57/169 [00:13<00:23,  4.74it/s][A
Training:  36%|███▌

Epoch: 29/44 - Loss: 4.8520 - Accuracy: 0.9556



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.72it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.40it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.13it/s][A
Epochs:  66%|██████▌   | 29/44 [19:55<10:20, 41.34s/it]

Val Loss: 7.0977 - Val Accuracy: 0.9469



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:19,  1.19s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.81it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.61it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.04it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.33it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.48it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.58it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.61it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.61it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.66it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.73it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.75it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.75it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.79it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.79it/s][A
Training:  36%|███▌

Epoch: 30/44 - Loss: 4.8553 - Accuracy: 0.9571



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.87it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.69it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.17it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.38it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.13it/s][A
Epochs:  68%|██████▊   | 30/44 [20:36<09:38, 41.33s/it]

Val Loss: 5.8565 - Val Accuracy: 0.9524



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:02<00:57,  2.83it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.69it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.14it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.39it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.53it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.67it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.71it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.76it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.83it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.85it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.86it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.85it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.86it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:22,  4.87it/s][A
Training:  36%|███▌

Epoch: 31/44 - Loss: 4.8966 - Accuracy: 0.9557



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.93it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.72it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.17it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.42it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.14it/s][A
Epochs:  70%|███████   | 31/44 [21:17<08:54, 41.13s/it]

Val Loss: 5.6547 - Val Accuracy: 0.9381



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:14,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.79it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.63it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.08it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.36it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.55it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.68it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.72it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.71it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.77it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.82it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.85it/s][A
Training:  28%|██▊       | 48/169 [00:10<00:19,  6.09it/s][A
Training:  30%|██▉       | 50/169 [00:11<00:24,  4.86it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:25,  4.52it/s][A
Training:  34%|███▎

Epoch: 32/44 - Loss: 4.6676 - Accuracy: 0.9545



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.83it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.65it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.10it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.35it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.07it/s][A
Epochs:  73%|███████▎  | 32/44 [21:58<08:12, 41.06s/it]

Val Loss: 6.1068 - Val Accuracy: 0.9469



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:15,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.87it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.69it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.13it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.39it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.54it/s][A
Training:  14%|█▍        | 24/169 [00:05<00:24,  5.90it/s][A
Training:  15%|█▌        | 26/169 [00:06<00:30,  4.66it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:31,  4.39it/s][A
Training:  19%|█▉        | 32/169 [00:07<00:23,  5.93it/s][A
Training:  20%|██        | 34/169 [00:07<00:29,  4.53it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:30,  4.31it/s][A
Training:  24%|██▎       | 40/169 [00:08<00:21,  5.91it/s][A
Training:  25%|██▍       | 42/169 [00:09<00:28,  4.52it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:28,  4.28it/s][A
Training:  28%|██▊ 

Epoch: 33/44 - Loss: 5.0266 - Accuracy: 0.9574



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.88it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.68it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.08it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.32it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.07it/s][A
Epochs:  75%|███████▌  | 33/44 [22:39<07:31, 41.03s/it]

Val Loss: 5.7093 - Val Accuracy: 0.9474



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:18,  1.18s/it][A
Training:   3%|▎         | 5/169 [00:02<00:59,  2.76it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.56it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.03it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.31it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.47it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.59it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.66it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.70it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.69it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.69it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.65it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.67it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.70it/s][A
Training:  34%|███▎      | 57/169 [00:13<00:23,  4.68it/s][A
Training:  36%|███▌

Epoch: 34/44 - Loss: 4.8728 - Accuracy: 0.9549



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.19s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.79it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.62it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.06it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.31it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.02it/s][A
Epochs:  77%|███████▋  | 34/44 [23:20<06:51, 41.19s/it]

Val Loss: 5.4341 - Val Accuracy: 0.9474



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.80it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.62it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.10it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.35it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.45it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.55it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.62it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.68it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.73it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.74it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.79it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.82it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.82it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.79it/s][A
Training:  36%|███▌

Epoch: 35/44 - Loss: 5.0197 - Accuracy: 0.9572



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.66it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.11it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.36it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.08it/s][A
Epochs:  80%|███████▉  | 35/44 [24:02<06:10, 41.20s/it]

Val Loss: 5.3544 - Val Accuracy: 0.9458



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:14,  1.16s/it][A
Training:   3%|▎         | 5/169 [00:02<00:59,  2.75it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.56it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.00it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.24it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:34,  4.35it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:32,  4.43it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:31,  4.51it/s][A
Training:  20%|█▉        | 33/169 [00:08<00:29,  4.64it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.69it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.71it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.73it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.72it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.71it/s][A
Training:  34%|███▎      | 57/169 [00:13<00:23,  4.75it/s][A
Training:  36%|███▌

Epoch: 36/44 - Loss: 4.6650 - Accuracy: 0.9578



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.17s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.69it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.04it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.32it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.06it/s][A
Epochs:  82%|████████▏ | 36/44 [24:43<05:30, 41.29s/it]

Val Loss: 7.0000 - Val Accuracy: 0.9398



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:08,  1.12s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.93it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.73it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.13it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.39it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.51it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.60it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.68it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.69it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.68it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.74it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.77it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.73it/s][A
Training:  31%|███       | 52/169 [00:11<00:19,  5.91it/s][A
Training:  32%|███▏      | 54/169 [00:12<00:23,  4.81it/s][A
Training:  34%|███▎

Epoch: 37/44 - Loss: 4.9691 - Accuracy: 0.9555



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.18s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.85it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.69it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.15it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.37it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.08it/s][A
Epochs:  84%|████████▍ | 37/44 [25:24<04:48, 41.20s/it]

Val Loss: 6.1885 - Val Accuracy: 0.9436



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:17,  1.18s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.80it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.61it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.07it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.33it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.46it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.53it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.64it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.74it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.80it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.78it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.81it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.85it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.87it/s][A
Training:  36%|███▌

Epoch: 38/44 - Loss: 4.7446 - Accuracy: 0.9573



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.87it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.76it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.18it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.44it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.17it/s][A
Epochs:  86%|████████▋ | 38/44 [26:05<04:06, 41.09s/it]

Val Loss: 7.9678 - Val Accuracy: 0.9458



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:19,  1.18s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.81it/s][A
Training:   5%|▌         | 9/169 [00:02<00:44,  3.61it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.04it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.28it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.48it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.53it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.59it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.68it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.74it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.79it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.78it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.79it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.80it/s][A
Training:  36%|███▌

Epoch: 39/44 - Loss: 4.5549 - Accuracy: 0.9570



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.83it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.71it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.18it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.46it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:  89%|████████▊ | 39/44 [26:46<03:25, 41.07s/it]

Val Loss: 9.0838 - Val Accuracy: 0.9419



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:11,  1.14s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.88it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.71it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.16it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.44it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.60it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:30,  4.65it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.67it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.72it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.77it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.81it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.83it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:24,  4.82it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:23,  4.84it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.82it/s][A
Training:  36%|███▌

Epoch: 40/44 - Loss: 4.9455 - Accuracy: 0.9582



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.93it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.74it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.18it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.46it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.19it/s][A
Epochs:  91%|█████████ | 40/44 [27:27<02:43, 40.96s/it]

Val Loss: 7.5151 - Val Accuracy: 0.9392



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:13,  1.15s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.81it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.65it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.09it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.26it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.45it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.52it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.60it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:29,  4.67it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.70it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.71it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.74it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.73it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.73it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.79it/s][A
Training:  36%|███▌

Epoch: 41/44 - Loss: 4.9692 - Accuracy: 0.9572



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.83it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.64it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.05it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.30it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.04it/s][A
Epochs:  93%|█████████▎| 41/44 [28:08<02:03, 41.07s/it]

Val Loss: 7.1921 - Val Accuracy: 0.9310



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:16,  1.17s/it][A
Training:   3%|▎         | 5/169 [00:01<00:57,  2.86it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.66it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.09it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.31it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.47it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.58it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:29,  4.67it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.69it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:28,  4.69it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.73it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.75it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.77it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.78it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.81it/s][A
Training:  36%|███▌

Epoch: 42/44 - Loss: 4.7151 - Accuracy: 0.9578



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.18s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.79it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.63it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.04it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.35it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.06it/s][A
Epochs:  95%|█████████▌| 42/44 [28:49<01:22, 41.05s/it]

Val Loss: 5.8298 - Val Accuracy: 0.9291



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:16,  1.17s/it][A
Training:   3%|▎         | 5/169 [00:02<00:58,  2.82it/s][A
Training:   5%|▌         | 9/169 [00:02<00:43,  3.64it/s][A
Training:   8%|▊         | 13/169 [00:03<00:38,  4.09it/s][A
Training:  10%|█         | 17/169 [00:04<00:35,  4.33it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:33,  4.43it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.52it/s][A
Training:  17%|█▋        | 29/169 [00:07<00:30,  4.64it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.71it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.74it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:26,  4.77it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:25,  4.81it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.79it/s][A
Training:  31%|███▏      | 53/169 [00:12<00:24,  4.81it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.82it/s][A
Training:  36%|███▌

Epoch: 43/44 - Loss: 4.8246 - Accuracy: 0.9582



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.19s/it][A
 23%|██▎       | 5/22 [00:02<00:06,  2.82it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.68it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.39it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.10it/s][A
Epochs:  98%|█████████▊| 43/44 [29:30<00:41, 41.10s/it]

Val Loss: 6.3334 - Val Accuracy: 0.9447



Training:   0%|          | 0/169 [00:00<?, ?it/s][A
Training:   1%|          | 1/169 [00:01<03:07,  1.12s/it][A
Training:   3%|▎         | 5/169 [00:01<00:56,  2.92it/s][A
Training:   5%|▌         | 9/169 [00:02<00:42,  3.74it/s][A
Training:   8%|▊         | 13/169 [00:03<00:37,  4.17it/s][A
Training:  10%|█         | 17/169 [00:04<00:34,  4.39it/s][A
Training:  12%|█▏        | 21/169 [00:05<00:32,  4.53it/s][A
Training:  15%|█▍        | 25/169 [00:06<00:31,  4.63it/s][A
Training:  17%|█▋        | 29/169 [00:06<00:29,  4.69it/s][A
Training:  20%|█▉        | 33/169 [00:07<00:28,  4.74it/s][A
Training:  22%|██▏       | 37/169 [00:08<00:27,  4.74it/s][A
Training:  24%|██▍       | 41/169 [00:09<00:27,  4.69it/s][A
Training:  27%|██▋       | 45/169 [00:10<00:26,  4.73it/s][A
Training:  29%|██▉       | 49/169 [00:11<00:25,  4.72it/s][A
Training:  31%|███▏      | 53/169 [00:11<00:24,  4.78it/s][A
Training:  34%|███▎      | 57/169 [00:12<00:23,  4.78it/s][A
Training:  36%|███▌

Epoch: 44/44 - Loss: 4.9305 - Accuracy: 0.9582



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.17s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.68it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.12it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.40it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.11it/s][A
Epochs: 100%|██████████| 44/44 [30:11<00:00, 41.17s/it]
[32m[I 2023-12-07 10:35:56,629][0m Trial 20 finished with value: 0.9507119655609131 and parameters: {'learning_rate': 0.0005624769068457545, 'weight_decay': 0.0021223853891047125, 'epsilon': 1.7002291132922094e-08, 'batch_size': 83, 'epochs': 44}. Best is trial 20 with value: 0.9507119655609131.[0m


Val Loss: 6.2484 - Val Accuracy: 0.9507
Saving best model...
Learning rate: 0.0005392119649022928
Weight decay: 0.0008173502000658292
Epsilon: 1.6821016245819212e-08
Batch size: 81
Number of epochs: 41


Epochs:   0%|          | 0/41 [00:00<?, ?it/s]
Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:18,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.92it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.77it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.22it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.50it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.64it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.75it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.78it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.84it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.87it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.86it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.86it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.84it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.88it/s][A
Training:  33%|███▎      | 57/173 [

Epoch: 1/41 - Loss: 8.0523 - Accuracy: 0.8758



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.90it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.75it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.26it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.52it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.23it/s][A
Epochs:   2%|▏         | 1/41 [00:40<27:13, 40.85s/it]

Val Loss: 5.8676 - Val Accuracy: 0.9253



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:20,  1.17s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.87it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.75it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.19it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.48it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.61it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.72it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.85it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.90it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.93it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.93it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:25,  4.93it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.90it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.94it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.97it/s][A
Training:  35%|███▌

Epoch: 2/41 - Loss: 5.3883 - Accuracy: 0.9356



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.96it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.79it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.27it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.56it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.26it/s][A
Epochs:   5%|▍         | 2/41 [01:21<26:31, 40.80s/it]

Val Loss: 5.2952 - Val Accuracy: 0.9376



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:21,  1.17s/it][A
Training:   3%|▎         | 5/173 [00:02<01:00,  2.79it/s][A
Training:   5%|▌         | 9/173 [00:02<00:45,  3.64it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.14it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.43it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.57it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.69it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.75it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.83it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.83it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.88it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.91it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.89it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.91it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.95it/s][A
Training:  35%|███▌

Epoch: 3/41 - Loss: 4.7554 - Accuracy: 0.9410



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.90it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.73it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.14it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.42it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.15it/s][A
Epochs:   7%|▋         | 3/41 [02:02<25:55, 40.94s/it]

Val Loss: 4.7935 - Val Accuracy: 0.9360



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:20,  1.17s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.86it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.68it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.16it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.39it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.49it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:32,  4.60it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.68it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.73it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.78it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.83it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.83it/s][A
Training:  28%|██▊       | 49/173 [00:11<00:25,  4.83it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.85it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:24,  4.83it/s][A
Training:  35%|███▌

Epoch: 4/41 - Loss: 4.4640 - Accuracy: 0.9436



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.97it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.85it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.33it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.58it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.28it/s][A
Epochs:  10%|▉         | 4/41 [02:43<25:16, 41.00s/it]

Val Loss: 5.2385 - Val Accuracy: 0.9332



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:14,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.91it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.75it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.24it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.51it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.64it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.70it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.79it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.87it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.90it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.87it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.90it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.88it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.91it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.92it/s][A
Training:  35%|███▌

Epoch: 5/41 - Loss: 4.5456 - Accuracy: 0.9456



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.95it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.83it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.29it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.57it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.25it/s][A
Epochs:  12%|█▏        | 5/41 [03:24<24:35, 40.98s/it]

Val Loss: 4.7736 - Val Accuracy: 0.9373



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:11,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.97it/s][A
Training:   5%|▌         | 9/173 [00:02<00:42,  3.82it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.24it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.57it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.68it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:30,  4.79it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.82it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.89it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.95it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.98it/s][A
Training:  26%|██▌       | 45/173 [00:09<00:25,  5.00it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.94it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.93it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.95it/s][A
Training:  35%|███▌

Epoch: 6/41 - Loss: 4.2329 - Accuracy: 0.9489



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.97it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.84it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.29it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.53it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.25it/s][A
Epochs:  15%|█▍        | 6/41 [04:05<23:51, 40.89s/it]

Val Loss: 5.1126 - Val Accuracy: 0.9376



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:18,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.88it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.76it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.19it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.41it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.59it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.70it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.79it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.86it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.85it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.89it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:25,  4.95it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.93it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.94it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.94it/s][A
Training:  35%|███▌

Epoch: 7/41 - Loss: 4.3090 - Accuracy: 0.9486



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  3.00it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.87it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.31it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.57it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.29it/s][A
Epochs:  17%|█▋        | 7/41 [04:46<23:11, 40.92s/it]

Val Loss: 5.1887 - Val Accuracy: 0.9426



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:11,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.90it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.74it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.22it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.51it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.64it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.75it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.86it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.88it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.95it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.92it/s][A
Training:  26%|██▌       | 45/173 [00:09<00:25,  4.96it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.94it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.94it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.94it/s][A
Training:  35%|███▌

Epoch: 8/41 - Loss: 4.2454 - Accuracy: 0.9469



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.99it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.87it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.33it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.58it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.30it/s][A
Epochs:  20%|█▉        | 8/41 [05:27<22:27, 40.83s/it]

Val Loss: 4.9581 - Val Accuracy: 0.9315



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:15,  1.14s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.94it/s][A
Training:   5%|▌         | 9/173 [00:02<00:42,  3.82it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.22it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.65it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.70it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.74it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.82it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.87it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.89it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.91it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.86it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.93it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.93it/s][A
Training:  35%|███▌

Epoch: 9/41 - Loss: 4.1121 - Accuracy: 0.9490



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.09s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.99it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.85it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.29it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.58it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.30it/s][A
Epochs:  22%|██▏       | 9/41 [06:08<21:49, 40.91s/it]

Val Loss: 5.3840 - Val Accuracy: 0.9365



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:15,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.93it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.76it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.17it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.43it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.64it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.76it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.81it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.85it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.85it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.88it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.92it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.84it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.84it/s][A
Training:  32%|███▏      | 55/173 [00:11<00:21,  5.59it/s][A
Training:  33%|███▎

Epoch: 10/41 - Loss: 4.0475 - Accuracy: 0.9492



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.69it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.11it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.36it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.11it/s][A
Epochs:  24%|██▍       | 10/41 [06:49<21:16, 41.17s/it]

Val Loss: 5.7284 - Val Accuracy: 0.9426



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:17,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:02<00:59,  2.82it/s][A
Training:   5%|▌         | 9/173 [00:02<00:45,  3.64it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.12it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.34it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:34,  4.47it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:32,  4.62it/s][A
Training:  17%|█▋        | 29/173 [00:07<00:30,  4.65it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:30,  4.67it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.74it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.77it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.82it/s][A
Training:  28%|██▊       | 49/173 [00:11<00:25,  4.83it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.86it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.88it/s][A
Training:  35%|███▌

Epoch: 11/41 - Loss: 4.4471 - Accuracy: 0.9486



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.90it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.71it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.13it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.39it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.11it/s][A
Epochs:  27%|██▋       | 11/41 [07:31<20:39, 41.32s/it]

Val Loss: 5.0814 - Val Accuracy: 0.9420



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:16,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.94it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.77it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.21it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.40it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.55it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.65it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.70it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.75it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.82it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.84it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.88it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.86it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.86it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.89it/s][A
Training:  35%|███▌

Epoch: 12/41 - Loss: 4.5183 - Accuracy: 0.9510



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.09s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.96it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.83it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.27it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.49it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.24it/s][A
Epochs:  29%|██▉       | 12/41 [08:12<19:58, 41.32s/it]

Val Loss: 6.4549 - Val Accuracy: 0.9355



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:11,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.91it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.71it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.14it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.36it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.57it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.68it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.76it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.80it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.82it/s][A
Training:  23%|██▎       | 40/173 [00:08<00:22,  6.04it/s][A
Training:  24%|██▍       | 42/173 [00:09<00:27,  4.82it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:28,  4.56it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:26,  4.72it/s][A
Training:  31%|███       | 53/173 [00:11<00:25,  4.78it/s][A
Training:  33%|███▎

Epoch: 13/41 - Loss: 4.3540 - Accuracy: 0.9499



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.97it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.85it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.29it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.51it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.23it/s][A
Epochs:  32%|███▏      | 13/41 [08:53<19:14, 41.23s/it]

Val Loss: 5.7570 - Val Accuracy: 0.9354



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:19,  1.16s/it][A
Training:   3%|▎         | 5/173 [00:02<00:59,  2.81it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.65it/s][A
Training:   8%|▊         | 13/173 [00:03<00:39,  4.06it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.37it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.57it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.66it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.81it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.86it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.89it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.91it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:25,  4.94it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.95it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.98it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.87it/s][A
Training:  35%|███▌

Epoch: 14/41 - Loss: 4.5680 - Accuracy: 0.9519



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.89it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.77it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.23it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.50it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.22it/s][A
Epochs:  34%|███▍      | 14/41 [09:34<18:30, 41.12s/it]

Val Loss: 5.8621 - Val Accuracy: 0.9354



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:14,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.91it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.75it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.18it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.44it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.62it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:30,  4.79it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.83it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.90it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.96it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.96it/s][A
Training:  26%|██▌       | 45/173 [00:09<00:25,  4.97it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:24,  5.00it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.99it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.99it/s][A
Training:  35%|███▌

Epoch: 15/41 - Loss: 4.2831 - Accuracy: 0.9509



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.07s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  3.02it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.86it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.32it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.54it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.26it/s][A
Epochs:  37%|███▋      | 15/41 [10:15<17:45, 40.99s/it]

Val Loss: 5.7877 - Val Accuracy: 0.9421



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:13,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.92it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.79it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.23it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.45it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.63it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.75it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.77it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.80it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.85it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.86it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.87it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.93it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.92it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.92it/s][A
Training:  35%|███▌

Epoch: 16/41 - Loss: 4.5600 - Accuracy: 0.9537



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.96it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.83it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.26it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.48it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.22it/s][A
Epochs:  39%|███▉      | 16/41 [10:56<17:05, 41.00s/it]

Val Loss: 5.3124 - Val Accuracy: 0.9389



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:17,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.89it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.77it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.20it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.65it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.73it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.80it/s][A
Training:  18%|█▊        | 32/173 [00:06<00:23,  6.12it/s][A
Training:  20%|█▉        | 34/173 [00:07<00:28,  4.83it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:29,  4.56it/s][A
Training:  23%|██▎       | 39/173 [00:08<00:24,  5.48it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:29,  4.41it/s][A
Training:  25%|██▌       | 44/173 [00:09<00:21,  6.13it/s][A
Training:  27%|██▋       | 46/173 [00:10<00:27,  4.59it/s][A
Training:  28%|██▊ 

Epoch: 17/41 - Loss: 4.2679 - Accuracy: 0.9516



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.12s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.94it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.84it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.31it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.51it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.23it/s][A
Epochs:  41%|████▏     | 17/41 [11:37<16:23, 40.99s/it]

Val Loss: 5.7307 - Val Accuracy: 0.9399



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:15,  1.14s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.92it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.77it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.22it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.62it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.65it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.73it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.75it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.80it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.83it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.80it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.82it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.84it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.87it/s][A
Training:  35%|███▌

Epoch: 18/41 - Loss: 4.5309 - Accuracy: 0.9556



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.90it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.78it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.22it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.46it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.18it/s][A
Epochs:  44%|████▍     | 18/41 [12:18<15:44, 41.07s/it]

Val Loss: 5.5565 - Val Accuracy: 0.9409



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:19,  1.16s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.86it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.70it/s][A
Training:   8%|▊         | 13/173 [00:03<00:39,  4.09it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.42it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.59it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.70it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.76it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.82it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.82it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.86it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.82it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.89it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.89it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.91it/s][A
Training:  35%|███▌

Epoch: 19/41 - Loss: 4.5089 - Accuracy: 0.9552



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.86it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.69it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.14it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.46it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.17it/s][A
Epochs:  46%|████▋     | 19/41 [13:00<15:05, 41.15s/it]

Val Loss: 5.4696 - Val Accuracy: 0.9359



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:08,  1.10s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.95it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.79it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.17it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.42it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.62it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.72it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.77it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.79it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.83it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.84it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.87it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.86it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.86it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.85it/s][A
Training:  35%|███▌

Epoch: 20/41 - Loss: 4.4952 - Accuracy: 0.9513



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.10s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.97it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.81it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.24it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.53it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.26it/s][A
Epochs:  49%|████▉     | 20/41 [13:41<14:24, 41.17s/it]

Val Loss: 5.4632 - Val Accuracy: 0.9365



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:13,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.93it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.78it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.18it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.44it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.62it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.72it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.80it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.85it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.85it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.87it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.92it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:24,  4.96it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.85it/s][A
Training:  32%|███▏      | 56/173 [00:11<00:19,  6.10it/s][A
Training:  34%|███▎

Epoch: 21/41 - Loss: 4.4312 - Accuracy: 0.9562



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  3.00it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.87it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.25it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.53it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.25it/s][A
Epochs:  51%|█████     | 21/41 [14:22<13:41, 41.06s/it]

Val Loss: 5.3679 - Val Accuracy: 0.9398



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:11,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.96it/s][A
Training:   5%|▌         | 9/173 [00:02<00:42,  3.83it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.20it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.46it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.58it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.67it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.76it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.84it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.84it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.90it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:25,  4.96it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.92it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.89it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.87it/s][A
Training:  35%|███▌

Epoch: 22/41 - Loss: 4.5218 - Accuracy: 0.9541



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.17s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.66it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.11it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.35it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.10it/s][A
Epochs:  54%|█████▎    | 22/41 [15:03<13:04, 41.29s/it]

Val Loss: 6.0006 - Val Accuracy: 0.9404



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:21,  1.17s/it][A
Training:   3%|▎         | 5/173 [00:02<01:00,  2.79it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.69it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.15it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.40it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.53it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.67it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.74it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.80it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.83it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.86it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.85it/s][A
Training:  28%|██▊       | 49/173 [00:11<00:25,  4.88it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.89it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.87it/s][A
Training:  35%|███▌

Epoch: 23/41 - Loss: 4.5474 - Accuracy: 0.9540



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.15s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.90it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.71it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.18it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.41it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.17it/s][A
Epochs:  56%|█████▌    | 23/41 [15:45<12:23, 41.30s/it]

Val Loss: 6.0469 - Val Accuracy: 0.9404



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:16,  1.14s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.88it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.73it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.16it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.42it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.59it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.66it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.72it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.78it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.82it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.85it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.88it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.90it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.91it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.91it/s][A
Training:  35%|███▌

Epoch: 24/41 - Loss: 4.4251 - Accuracy: 0.9546



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.16s/it][A
 23%|██▎       | 5/22 [00:02<00:05,  2.84it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.65it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.09it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.35it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.07it/s][A
Epochs:  59%|█████▊    | 24/41 [16:26<11:43, 41.36s/it]

Val Loss: 6.2264 - Val Accuracy: 0.9416



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:19,  1.16s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.86it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.65it/s][A
Training:   8%|▊         | 13/173 [00:03<00:39,  4.09it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.37it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.57it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.64it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.71it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.77it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.80it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.81it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.81it/s][A
Training:  28%|██▊       | 49/173 [00:11<00:25,  4.82it/s][A
Training:  31%|███       | 53/173 [00:11<00:25,  4.78it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:24,  4.77it/s][A
Training:  35%|███▌

Epoch: 25/41 - Loss: 4.3076 - Accuracy: 0.9555



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:24,  1.18s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.87it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.73it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.17it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.39it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.12it/s][A
Epochs:  61%|██████    | 25/41 [17:08<11:03, 41.48s/it]

Val Loss: 6.3200 - Val Accuracy: 0.9370



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:21,  1.17s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.87it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.68it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.12it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.34it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.53it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:32,  4.61it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.68it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.74it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.77it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.80it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.85it/s][A
Training:  28%|██▊       | 49/173 [00:11<00:25,  4.82it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.85it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.89it/s][A
Training:  35%|███▌

Epoch: 26/41 - Loss: 4.7486 - Accuracy: 0.9548



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.10s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.92it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.79it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.24it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.51it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.22it/s][A
Epochs:  63%|██████▎   | 26/41 [17:49<10:21, 41.42s/it]

Val Loss: 6.9165 - Val Accuracy: 0.9249



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:12,  1.12s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.92it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.77it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.18it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.60it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.71it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.80it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.83it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.90it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.89it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.89it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.86it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.85it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.87it/s][A
Training:  35%|███▌

Epoch: 27/41 - Loss: 4.5865 - Accuracy: 0.9538



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.09s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.96it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.82it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.27it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.53it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.24it/s][A
Epochs:  66%|██████▌   | 27/41 [18:31<09:39, 41.42s/it]

Val Loss: 6.6589 - Val Accuracy: 0.9422



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:16,  1.14s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.93it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.80it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.18it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.61it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.73it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.80it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.82it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.85it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.88it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.82it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.87it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.94it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.96it/s][A
Training:  35%|███▌

Epoch: 28/41 - Loss: 4.6754 - Accuracy: 0.9550



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.91it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.73it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.21it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.48it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.20it/s][A
Epochs:  68%|██████▊   | 28/41 [19:12<08:57, 41.37s/it]

Val Loss: 5.4268 - Val Accuracy: 0.9438



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:08,  1.09s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.98it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.80it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.27it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.46it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.63it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.77it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.84it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.87it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.89it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.90it/s][A
Training:  26%|██▌       | 45/173 [00:09<00:26,  4.92it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.91it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.91it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.91it/s][A
Training:  35%|███▌

Epoch: 29/41 - Loss: 4.6108 - Accuracy: 0.9575



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.98it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.82it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.24it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.53it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.24it/s][A
Epochs:  71%|███████   | 29/41 [19:53<08:14, 41.23s/it]

Val Loss: 6.4752 - Val Accuracy: 0.9460



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:10,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.96it/s][A
Training:   5%|▌         | 9/173 [00:02<00:42,  3.82it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.24it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.51it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.64it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.77it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.81it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.83it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.86it/s][A
Training:  23%|██▎       | 40/173 [00:08<00:21,  6.14it/s][A
Training:  24%|██▍       | 42/173 [00:09<00:26,  4.85it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:28,  4.47it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:27,  4.57it/s][A
Training:  31%|███       | 53/173 [00:11<00:25,  4.63it/s][A
Training:  33%|███▎

Epoch: 30/41 - Loss: 4.6933 - Accuracy: 0.9553



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.95it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.80it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.26it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.45it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.19it/s][A
Epochs:  73%|███████▎  | 30/41 [20:34<07:33, 41.20s/it]

Val Loss: 5.8853 - Val Accuracy: 0.9460



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:09,  1.10s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.98it/s][A
Training:   5%|▌         | 9/173 [00:02<00:44,  3.71it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.13it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.40it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.58it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.69it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.78it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.81it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.88it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.93it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.92it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.96it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  5.00it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.98it/s][A
Training:  35%|███▌

Epoch: 31/41 - Loss: 4.7326 - Accuracy: 0.9540



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.08s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  3.02it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.84it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.28it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.53it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.25it/s][A
Epochs:  76%|███████▌  | 31/41 [21:15<06:51, 41.12s/it]

Val Loss: 5.9605 - Val Accuracy: 0.9483



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:11,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.96it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.79it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.22it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.46it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.62it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.75it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.74it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.84it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.90it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.89it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.92it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:24,  4.96it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.95it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.98it/s][A
Training:  35%|███▌

Epoch: 32/41 - Loss: 4.5707 - Accuracy: 0.9548



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.10s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.99it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.82it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.22it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.50it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.22it/s][A
Epochs:  78%|███████▊  | 32/41 [21:56<06:09, 41.06s/it]

Val Loss: 6.0816 - Val Accuracy: 0.9432



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:17,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.89it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.75it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.17it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.45it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.56it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.68it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.81it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.85it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.90it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.91it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.89it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.84it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.86it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.85it/s][A
Training:  35%|███▌

Epoch: 33/41 - Loss: 4.4625 - Accuracy: 0.9556



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.10s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.99it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.76it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.25it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.48it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.21it/s][A
Epochs:  80%|████████  | 33/41 [22:37<05:29, 41.13s/it]

Val Loss: 5.9919 - Val Accuracy: 0.9439



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:09,  1.10s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.94it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.75it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.19it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.43it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.60it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.72it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.79it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.85it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.88it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.85it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.86it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.89it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.90it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.91it/s][A
Training:  35%|███▌

Epoch: 34/41 - Loss: 4.5708 - Accuracy: 0.9572



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.97it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.83it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.30it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.54it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.26it/s][A
Epochs:  83%|████████▎ | 34/41 [23:18<04:47, 41.07s/it]

Val Loss: 6.3521 - Val Accuracy: 0.9432



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:10,  1.11s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.88it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.73it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.18it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.45it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.64it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.73it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.80it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.83it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.84it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.80it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.82it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.82it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.84it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.84it/s][A
Training:  35%|███▌

Epoch: 35/41 - Loss: 4.7756 - Accuracy: 0.9580



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.94it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.82it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.24it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.48it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.21it/s][A
Epochs:  85%|████████▌ | 35/41 [23:59<04:06, 41.13s/it]

Val Loss: 6.3929 - Val Accuracy: 0.9325



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:23,  1.18s/it][A
Training:   3%|▎         | 5/173 [00:02<01:00,  2.78it/s][A
Training:   5%|▌         | 9/173 [00:02<00:45,  3.59it/s][A
Training:   8%|▊         | 13/173 [00:03<00:39,  4.06it/s][A
Training:  10%|▉         | 17/173 [00:04<00:36,  4.32it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.51it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.63it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.71it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.78it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.83it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.78it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.86it/s][A
Training:  28%|██▊       | 49/173 [00:11<00:25,  4.93it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.92it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.95it/s][A
Training:  35%|███▌

Epoch: 36/41 - Loss: 4.6536 - Accuracy: 0.9583



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:22,  1.09s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  3.01it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.90it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.35it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.60it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.31it/s][A
Epochs:  88%|████████▊ | 36/41 [24:40<03:25, 41.06s/it]

Val Loss: 6.7196 - Val Accuracy: 0.9449



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:08,  1.10s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.95it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.80it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.20it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.65it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.76it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.78it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.86it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.90it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.89it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.83it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.88it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.89it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.90it/s][A
Training:  35%|███▌

Epoch: 37/41 - Loss: 4.6725 - Accuracy: 0.9584



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.11s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.99it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.86it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.30it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.57it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.27it/s][A
Epochs:  90%|█████████ | 37/41 [25:21<02:43, 41.00s/it]

Val Loss: 6.0278 - Val Accuracy: 0.9460



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:14,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:57,  2.94it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.78it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.24it/s][A
Training:  10%|▉         | 17/173 [00:04<00:34,  4.47it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:32,  4.65it/s][A
Training:  14%|█▍        | 25/173 [00:05<00:31,  4.77it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:29,  4.83it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.89it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.93it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.94it/s][A
Training:  26%|██▌       | 45/173 [00:09<00:26,  4.92it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.92it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.85it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.91it/s][A
Training:  35%|███▌

Epoch: 38/41 - Loss: 4.5349 - Accuracy: 0.9564



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.14s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.92it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.78it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.25it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.50it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.20it/s][A
Epochs:  93%|█████████▎| 38/41 [26:02<02:03, 41.04s/it]

Val Loss: 6.4319 - Val Accuracy: 0.9421



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:12,  1.12s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.96it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.78it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.20it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.42it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.55it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.67it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.78it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.83it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.89it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:26,  4.95it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:25,  4.96it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:24,  4.99it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.97it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.97it/s][A
Training:  35%|███▌

Epoch: 39/41 - Loss: 4.8799 - Accuracy: 0.9569



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.96it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.88it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.36it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.56it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.27it/s][A
Epochs:  95%|█████████▌| 39/41 [26:43<01:22, 41.06s/it]

Val Loss: 6.6190 - Val Accuracy: 0.9411



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:17,  1.15s/it][A
Training:   3%|▎         | 5/173 [00:01<00:58,  2.88it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.77it/s][A
Training:   8%|▊         | 13/173 [00:03<00:38,  4.19it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.37it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.54it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.67it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.77it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:28,  4.86it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:27,  4.88it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.85it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.85it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.86it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.89it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.85it/s][A
Training:  35%|███▌

Epoch: 40/41 - Loss: 4.7408 - Accuracy: 0.9579



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.13s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.94it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.84it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.24it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.51it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.22it/s][A
Epochs:  98%|█████████▊| 40/41 [27:25<00:41, 41.13s/it]

Val Loss: 6.9704 - Val Accuracy: 0.9432



Training:   0%|          | 0/173 [00:00<?, ?it/s][A
Training:   1%|          | 1/173 [00:01<03:13,  1.13s/it][A
Training:   3%|▎         | 5/173 [00:01<00:56,  2.96it/s][A
Training:   5%|▌         | 9/173 [00:02<00:43,  3.79it/s][A
Training:   8%|▊         | 13/173 [00:03<00:37,  4.23it/s][A
Training:  10%|▉         | 17/173 [00:04<00:35,  4.43it/s][A
Training:  12%|█▏        | 21/173 [00:05<00:33,  4.60it/s][A
Training:  14%|█▍        | 25/173 [00:06<00:31,  4.70it/s][A
Training:  17%|█▋        | 29/173 [00:06<00:30,  4.77it/s][A
Training:  19%|█▉        | 33/173 [00:07<00:29,  4.81it/s][A
Training:  21%|██▏       | 37/173 [00:08<00:28,  4.83it/s][A
Training:  24%|██▎       | 41/173 [00:09<00:27,  4.84it/s][A
Training:  26%|██▌       | 45/173 [00:10<00:26,  4.87it/s][A
Training:  28%|██▊       | 49/173 [00:10<00:25,  4.89it/s][A
Training:  31%|███       | 53/173 [00:11<00:24,  4.92it/s][A
Training:  33%|███▎      | 57/173 [00:12<00:23,  4.91it/s][A
Training:  35%|███▌

Epoch: 41/41 - Loss: 4.7261 - Accuracy: 0.9587



  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:01<00:23,  1.10s/it][A
 23%|██▎       | 5/22 [00:01<00:05,  2.93it/s][A
 41%|████      | 9/22 [00:02<00:03,  3.80it/s][A
 59%|█████▉    | 13/22 [00:03<00:02,  4.23it/s][A
 77%|███████▋  | 17/22 [00:04<00:01,  4.47it/s][A
100%|██████████| 22/22 [00:05<00:00,  4.21it/s][A
Epochs: 100%|██████████| 41/41 [28:06<00:00, 41.13s/it]
[32m[I 2023-12-07 11:04:03,066][0m Trial 21 finished with value: 0.9471353888511658 and parameters: {'learning_rate': 0.0005392119649022928, 'weight_decay': 0.0008173502000658292, 'epsilon': 1.6821016245819212e-08, 'batch_size': 81, 'epochs': 41}. Best is trial 20 with value: 0.9507119655609131.[0m


Val Loss: 6.7797 - Val Accuracy: 0.9471
Learning rate: 0.0037564467363338933
Weight decay: 0.0006913946037521589
Epsilon: 1.383108474334189e-08
Batch size: 84
Number of epochs: 42


Epochs:   0%|          | 0/42 [00:00<?, ?it/s]
Training:   0%|          | 0/167 [00:00<?, ?it/s][A
Training:   1%|          | 1/167 [00:01<03:10,  1.15s/it][A
Training:   3%|▎         | 5/167 [00:01<00:56,  2.84it/s][A
Training:   5%|▌         | 9/167 [00:02<00:43,  3.64it/s][A
Training:   8%|▊         | 13/167 [00:03<00:38,  4.01it/s][A
Training:  10%|█         | 17/167 [00:04<00:35,  4.28it/s][A
Training:  13%|█▎        | 21/167 [00:05<00:33,  4.42it/s][A
Training:  15%|█▍        | 25/167 [00:06<00:31,  4.52it/s][A
Training:  17%|█▋        | 29/167 [00:07<00:30,  4.54it/s][A
Training:  20%|█▉        | 33/167 [00:07<00:29,  4.61it/s][A
Training:  22%|██▏       | 37/167 [00:08<00:27,  4.65it/s][A
Training:  25%|██▍       | 41/167 [00:09<00:26,  4.69it/s][A
Training:  27%|██▋       | 45/167 [00:10<00:25,  4.70it/s][A
Training:  29%|██▉       | 49/167 [00:11<00:24,  4.75it/s][A
Training:  32%|███▏      | 53/167 [00:12<00:23,  4.77it/s][A
Training:  34%|███▍      | 57/167 [

Epoch: 1/42 - Loss: 67.9668 - Accuracy: 0.8878



  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:01<00:22,  1.15s/it][A
 24%|██▍       | 5/21 [00:02<00:05,  2.80it/s][A
 43%|████▎     | 9/21 [00:02<00:03,  3.62it/s][A
 62%|██████▏   | 13/21 [00:03<00:01,  4.06it/s][A
 81%|████████  | 17/21 [00:04<00:00,  4.31it/s][A
100%|██████████| 21/21 [00:05<00:00,  3.99it/s][A
Epochs:   2%|▏         | 1/42 [00:41<28:10, 41.23s/it]

Val Loss: 113.1539 - Val Accuracy: 0.9248



Training:   0%|          | 0/167 [00:00<?, ?it/s][A
Training:   1%|          | 1/167 [00:01<03:14,  1.17s/it][A
Training:   3%|▎         | 5/167 [00:02<00:57,  2.82it/s][A
Training:   5%|▌         | 9/167 [00:02<00:43,  3.62it/s][A
Training:   8%|▊         | 13/167 [00:03<00:38,  4.04it/s][A
Training:  10%|█         | 17/167 [00:04<00:34,  4.32it/s][A
Training:  12%|█▏        | 20/167 [00:04<00:25,  5.75it/s][A
Training:  13%|█▎        | 22/167 [00:05<00:32,  4.47it/s][A
Training:  15%|█▍        | 25/167 [00:06<00:33,  4.22it/s][A
Training:  17%|█▋        | 29/167 [00:07<00:31,  4.41it/s][A
Training:  20%|█▉        | 33/167 [00:07<00:29,  4.52it/s][A
Training:  22%|██▏       | 36/167 [00:08<00:22,  5.88it/s][A
Training:  23%|██▎       | 38/167 [00:08<00:27,  4.61it/s][A
Training:  25%|██▍       | 41/167 [00:09<00:28,  4.38it/s][A
Training:  26%|██▋       | 44/167 [00:09<00:20,  5.88it/s][A
Training:  28%|██▊       | 46/167 [00:10<00:27,  4.42it/s][A
Training:  29%|██▉ 

Epoch: 2/42 - Loss: 121.7133 - Accuracy: 0.9148



  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:01<00:22,  1.12s/it][A
 24%|██▍       | 5/21 [00:01<00:05,  2.91it/s][A
 43%|████▎     | 9/21 [00:02<00:03,  3.70it/s][A
 62%|██████▏   | 13/21 [00:03<00:01,  4.08it/s][A
 81%|████████  | 17/21 [00:04<00:00,  4.34it/s][A
100%|██████████| 21/21 [00:05<00:00,  4.06it/s][A
Epochs:   2%|▏         | 1/42 [01:22<56:15, 82.32s/it]
[32m[I 2023-12-07 11:05:25,605][0m Trial 22 pruned. [0m


Val Loss: 147.6626 - Val Accuracy: 0.8919
Learning rate: 0.0007509460686905558
Weight decay: 0.0002543029701233671
Epsilon: 1.9482470664257024e-08
Batch size: 73
Number of epochs: 69


Epochs:   0%|          | 0/69 [00:00<?, ?it/s]
Training:   0%|          | 0/192 [00:00<?, ?it/s][A
Training:   1%|          | 1/192 [00:01<03:18,  1.04s/it][A
Training:   3%|▎         | 5/192 [00:01<00:58,  3.19it/s][A
Training:   5%|▍         | 9/192 [00:02<00:44,  4.10it/s][A
Training:   7%|▋         | 13/192 [00:03<00:39,  4.58it/s][A
Training:   8%|▊         | 16/192 [00:03<00:28,  6.27it/s][A
Training:   9%|▉         | 18/192 [00:04<00:36,  4.83it/s][A
Training:  11%|█         | 21/192 [00:04<00:36,  4.73it/s][A
Training:  12%|█▎        | 24/192 [00:04<00:26,  6.42it/s][A
Training:  14%|█▎        | 26/192 [00:05<00:32,  5.04it/s][A
Training:  15%|█▌        | 29/192 [00:06<00:33,  4.85it/s][A
Training:  16%|█▌        | 31/192 [00:06<00:27,  5.92it/s][A
Training:  17%|█▋        | 33/192 [00:06<00:32,  4.83it/s][A
Training:  19%|█▉        | 36/192 [00:07<00:22,  6.79it/s][A
Training:  20%|█▉        | 38/192 [00:07<00:30,  5.05it/s][A
Training:  21%|██▏       | 41/192 [

Epoch: 1/69 - Loss: 8.7591 - Accuracy: 0.8960



  0%|          | 0/24 [00:00<?, ?it/s][A
  4%|▍         | 1/24 [00:01<00:24,  1.06s/it][A
 21%|██        | 5/24 [00:01<00:06,  3.11it/s][A
 38%|███▊      | 9/24 [00:02<00:03,  4.05it/s][A
 54%|█████▍    | 13/24 [00:03<00:02,  4.55it/s][A
 71%|███████   | 17/24 [00:04<00:01,  4.88it/s][A
100%|██████████| 24/24 [00:04<00:00,  4.90it/s][A
Epochs:   1%|▏         | 1/69 [00:41<46:39, 41.17s/it]

Val Loss: 7.3327 - Val Accuracy: 0.9305



Training:   0%|          | 0/192 [00:00<?, ?it/s][A
Training:   1%|          | 1/192 [00:01<03:28,  1.09s/it][A
Training:   3%|▎         | 5/192 [00:01<01:01,  3.05it/s][A
Training:   5%|▍         | 9/192 [00:02<00:45,  3.99it/s][A
Training:   7%|▋         | 13/192 [00:03<00:40,  4.47it/s][A
Training:   9%|▉         | 17/192 [00:04<00:36,  4.78it/s][A
Training:  11%|█         | 21/192 [00:04<00:34,  4.96it/s][A
Training:  13%|█▎        | 25/192 [00:05<00:32,  5.10it/s][A
Training:  15%|█▌        | 29/192 [00:06<00:31,  5.19it/s][A
Training:  17%|█▋        | 33/192 [00:07<00:30,  5.26it/s][A
Training:  19%|█▉        | 37/192 [00:07<00:29,  5.28it/s][A
Training:  21%|██▏       | 41/192 [00:08<00:28,  5.28it/s][A
Training:  23%|██▎       | 45/192 [00:09<00:27,  5.30it/s][A
Training:  26%|██▌       | 49/192 [00:10<00:26,  5.37it/s][A
Training:  28%|██▊       | 53/192 [00:10<00:25,  5.39it/s][A
Training:  30%|██▉       | 57/192 [00:11<00:24,  5.45it/s][A
Training:  32%|███▏

Epoch: 2/69 - Loss: 7.2777 - Accuracy: 0.9361



  0%|          | 0/24 [00:00<?, ?it/s][A
  4%|▍         | 1/24 [00:01<00:23,  1.03s/it][A
 21%|██        | 5/24 [00:01<00:05,  3.22it/s][A
 38%|███▊      | 9/24 [00:02<00:03,  4.16it/s][A
 54%|█████▍    | 13/24 [00:03<00:02,  4.65it/s][A
 71%|███████   | 17/24 [00:03<00:01,  4.90it/s][A
100%|██████████| 24/24 [00:04<00:00,  5.01it/s][A
Epochs:   3%|▎         | 2/69 [01:21<45:42, 40.93s/it]

Val Loss: 7.4391 - Val Accuracy: 0.9329



Training:   0%|          | 0/192 [00:00<?, ?it/s][A
Training:   1%|          | 1/192 [00:01<03:22,  1.06s/it][A
Training:   3%|▎         | 5/192 [00:01<00:58,  3.22it/s][A
Training:   5%|▍         | 9/192 [00:02<00:44,  4.10it/s][A
Training:   7%|▋         | 13/192 [00:03<00:39,  4.56it/s][A
Training:   9%|▉         | 17/192 [00:04<00:36,  4.86it/s][A
Training:  11%|█         | 21/192 [00:04<00:33,  5.06it/s][A
Training:  13%|█▎        | 25/192 [00:05<00:32,  5.13it/s][A
Training:  15%|█▌        | 29/192 [00:06<00:31,  5.24it/s][A
Training:  17%|█▋        | 33/192 [00:06<00:29,  5.35it/s][A
Training:  19%|█▉        | 37/192 [00:07<00:28,  5.37it/s][A
Training:  21%|██▏       | 41/192 [00:08<00:28,  5.38it/s][A
Training:  23%|██▎       | 45/192 [00:09<00:27,  5.39it/s][A
Training:  26%|██▌       | 49/192 [00:09<00:26,  5.41it/s][A
Training:  28%|██▊       | 53/192 [00:10<00:25,  5.46it/s][A
Training:  30%|██▉       | 57/192 [00:11<00:24,  5.44it/s][A
Training:  32%|███▏

Epoch: 3/69 - Loss: 6.9911 - Accuracy: 0.9411



  0%|          | 0/24 [00:00<?, ?it/s][A
  4%|▍         | 1/24 [00:01<00:23,  1.02s/it][A
 21%|██        | 5/24 [00:01<00:05,  3.22it/s][A
 38%|███▊      | 9/24 [00:02<00:03,  4.22it/s][A
 54%|█████▍    | 13/24 [00:03<00:02,  4.70it/s][A
 71%|███████   | 17/24 [00:03<00:01,  4.99it/s][A
100%|██████████| 24/24 [00:04<00:00,  5.02it/s][A
Epochs:   3%|▎         | 2/69 [02:02<1:08:22, 61.23s/it]
[32m[I 2023-12-07 11:07:28,285][0m Trial 23 pruned. [0m


Val Loss: 8.0881 - Val Accuracy: 0.9339

Study statistics: 
  Number of finished trials:  24
  Number of pruned trials:  8
  Number of complete trials:  13


In [48]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9507119655609131
  Params: 
    batch_size: 83
    epochs: 44
    epsilon: 1.7002291132922094e-08
    learning_rate: 0.0005624769068457545
    weight_decay: 0.0021223853891047125


In [None]:
# ViT P8-S8 Triplet Mean

Best trial:
Value:  0.9507119655609131
Params: 
batch_size: 83
epochs: 44
epsilon: 1.7002291132922094e-08
learning_rate: 0.0005624769068457545
weight_decay: 0.0021223853891047125