# STAT4012 Project

- Implement residual network
- 4-fold cross validation
- Ensembling
- Add lr_scheduler

In [1]:
! nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3090 (UUID: GPU-72fdbd81-da45-b750-3719-ae5877e26726)


In [2]:
import multiprocessing as mp
num_cpu = mp.cpu_count()
num_cpu

12

In [3]:
_exp_name = "resnet"

In [4]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import random
import wandb

In [5]:
myseed = 4012  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [6]:
test_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    # (height = width = 128)
    #transforms.CenterCrop()
    transforms.RandomResizedCrop((128, 128), scale=(0.7, 1.0)),
    #transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
    #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.RandomRotation(180),
    transforms.RandomAffine(30),
    #transforms.RandomInvert(p=0.2),
    #transforms.RandomPosterize(bits=2),
    #transforms.RandomSolarize(threshold=192.0, p=0.2),
    #transforms.RandomEqualize(p=0.2),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    #transforms.RandomApply(torch.nn.ModuleList([]))
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [7]:
class FoodDataset(Dataset):

    def __init__(self,path=None,tfm=test_tfm,files=None):
        super(FoodDataset).__init__()
        self.path = path
        if path:
            self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
        else:
            self.files = files
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label

In [8]:
class Residual_Block(nn.Module):
    def __init__(self, ic, oc, stride=1):
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(ic, oc, kernel_size=3, stride=stride, padding=1),
            nn.BatchNorm2d(oc),
            nn.ReLU(inplace=True)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(oc, oc, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(oc),
        )
        
        self.relu = nn.ReLU(inplace=True)
    
        self.downsample = None
        if stride != 1 or (ic != oc):
            self.downsample = nn.Sequential(
                nn.Conv2d(ic, oc, kernel_size=1, stride=stride),
                nn.BatchNorm2d(oc),
            )
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        
        if self.downsample:
            residual = self.downsample(x)
            
        out += residual
        return self.relu(out)

class Classifier(nn.Module):
    def __init__(self, block, num_layers, num_classes=11):
        super().__init__()
        self.preconv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
        )
        
        self.layer0 = self.make_residual(block, 32, 64,  num_layers[0], stride=2)
        self.layer1 = self.make_residual(block, 64, 128, num_layers[1], stride=2)
        self.layer2 = self.make_residual(block, 128, 256, num_layers[2], stride=2)
        self.layer3 = self.make_residual(block, 256, 512, num_layers[3], stride=2)
        
#         self.avgpool = nn.AvgPool2d(2)
        
        self.fc = nn.Sequential(            
            nn.Dropout(0.4),
            nn.Linear(512*4*4, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(512, 11),
        )
        
        
    def make_residual(self, block, ic, oc, num_layer, stride=1):
        layers = []
        layers.append(block(ic, oc, stride))
        for i in range(1, num_layer):
            layers.append(block(oc, oc))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # [3, 128, 128]
        out = self.preconv(x)  # [32, 64, 64]
        out = self.layer0(out) # [64, 32, 32]
        out = self.layer1(out) # [128, 16, 16]
        out = self.layer2(out) # [256, 8, 8]
        out = self.layer3(out) # [512, 4, 4]
#         out = self.avgpool(out) # [512, 2, 2]
        out = self.fc(out.view(out.size(0), -1)) 
        return out

In [9]:
batch_size = 128
num_layers = [2, 3, 3, 1] # residual number layers

n_epochs = 300
patience = 20 # If no improvement in 'patience' epochs, early stop

k_fold = 4

In [10]:
train_dir = "./food-11/training"
val_dir = "./food-11/validation"

train_files = [os.path.join(train_dir, x) for x in os.listdir(train_dir) if x.endswith('.jpg')]
val_files = [os.path.join(val_dir, x) for x in os.listdir(val_dir) if x.endswith('.jpg')]
total_files = train_files + val_files
random.seed(myseed)
random.shuffle(total_files)

num = len(total_files) // k_fold
len(total_files)

13296

In [11]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

test_fold = k_fold

for i in range(test_fold):
    fold = i+1
    print(f'\n\nStarting Fold: {fold} ********************************************')
    model = Classifier(Residual_Block, num_layers).to(device)
    print(next(model.parameters()).device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0004, weight_decay=1e-5) 
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2)
    stale = 0
    best_acc = 0
    
    val_data = total_files[i*num: (i+1)*num]
    train_data = total_files[:i*num] + total_files[(i+1)*num:]
    
    train_set = FoodDataset(tfm=train_tfm, files=train_data)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)
    
    valid_set = FoodDataset(tfm=test_tfm, files=val_data)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)
    
    wandb.init(
        project="Food-11",
        config={
            "learning rate": 0.0004,
            "architecture": "CNN",
            "epochs": n_epochs,
            "batch_size": batch_size,
            "image_dim": 128,
            "T_0": 5,
            "T_mult": 2,
        }
    )

    for epoch in range(n_epochs):
    
        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()
    
        # These are used to record information in training.
        train_loss = []
        train_accs = []
        lr = optimizer.param_groups[0]["lr"]
        
        pbar = tqdm(train_loader)
        pbar.set_description(f'T: {epoch+1:03d}/{n_epochs:03d}')
        for batch in pbar:
    
            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            #imgs = imgs.half()
            #print(imgs.shape,labels.shape)
    
            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs.to(device))
    
            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels.to(device))
    
            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()
    
            # Compute the gradients for parameters.
            loss.backward()
    
            # Clip the gradient norms for stable training.
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
    
            # Update the parameters with computed gradients.
            optimizer.step()
    
            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
    
            # Record the loss and accuracy.
            wandb.log({"step_training_loss": loss.item()})
            wandb.log({"step_training_accuracy": acc})
            train_loss.append(loss.item())
            train_accs.append(acc)
            pbar.set_postfix({'lr':lr, 'b_loss':loss.item(), 'b_acc':acc.item(),
                    'loss':sum(train_loss)/len(train_loss), 'acc': sum(train_accs).item()/len(train_accs)})
            
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)
        # Print the information.
        print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

        wandb.log({"learning rate": scheduler.get_last_lr()[0]})

        scheduler.step()
        
        
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()
    
        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []
    
        # Iterate the validation set by batches.
        pbar = tqdm(valid_loader)
        pbar.set_description(f'V: {epoch+1:03d}/{n_epochs:03d}')
        for batch in pbar:

            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            #imgs = imgs.half()
    
            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))
    
            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))
    
            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
    
            # Record the loss and accuracy.
            wandb.log({"step_validation_loss": loss.item()})
            wandb.log({"step_validation_accuracy": acc})
            valid_loss.append(loss.item())
            valid_accs.append(acc)
            pbar.set_postfix({'v_loss':sum(valid_loss)/len(valid_loss), 
                              'v_acc': sum(valid_accs).item()/len(valid_accs)})
        
            #break
    
        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)

        # Print the information.
        print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

        # update logs
        if valid_acc > best_acc:
            with open(f"{_exp_name}_fold_{fold}_log.txt","a") as f:
                newline = '\n'
                item = f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best{newline}"
                f.write(item)
                print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
        else:
            with open(f"{_exp_name}_fold_{fold}_log.txt","a") as f:
                newline = '\n'
                item = f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}{newline}"
                f.write(item)
                print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
        
        wandb.log({
            "average_training_loss": train_loss,
            "average_validation_loss": valid_loss,
        })

        # save models
        if valid_acc > best_acc:
            print(f"Best model found at fold {fold} epoch {epoch+1}, acc={valid_acc:.5f}, saving model")
            torch.save(model.state_dict(), f"Fold_{fold}_best.ckpt")
            # only save best to prevent output memory exceed error
            best_acc = valid_acc
            stale = 0
        else:
            stale += 1
            if stale > patience:
                print(f"No improvment {patience} consecutive epochs, early stopping")
                break
    wandb.finish()

cuda


Starting Fold: 1 ********************************************
cuda:0


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666868954974537, max=1.0)…

T: 001/300: 100%|█| 78/78 [00:07<00:00,  9.76it/s, lr=0.0004, b_loss=1.97, b_acc


[ Train | 001/300 ] loss = 2.09193, acc = 0.26191


V: 001/300: 100%|███████| 26/26 [00:02<00:00, 12.05it/s, v_loss=2.31, v_acc=0.2]


[ Valid | 001/300 ] loss = 2.30717, acc = 0.19972
[ Valid | 001/300 ] loss = 2.30717, acc = 0.19972 -> best
Best model found at fold 1 epoch 1, acc=0.19972, saving model


T: 002/300: 100%|█| 78/78 [00:07<00:00, 10.46it/s, lr=0.000362, b_loss=1.9, b_ac


[ Train | 002/300 ] loss = 1.92494, acc = 0.32358


V: 002/300: 100%|█████| 26/26 [00:02<00:00, 11.51it/s, v_loss=1.87, v_acc=0.338]


[ Valid | 002/300 ] loss = 1.87200, acc = 0.33786
[ Valid | 002/300 ] loss = 1.87200, acc = 0.33786 -> best
Best model found at fold 1 epoch 2, acc=0.33786, saving model


T: 003/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.000262, b_loss=1.76, b_a


[ Train | 003/300 ] loss = 1.82487, acc = 0.35949


V: 003/300: 100%|█████| 26/26 [00:01<00:00, 13.75it/s, v_loss=1.88, v_acc=0.332]


[ Valid | 003/300 ] loss = 1.87729, acc = 0.33246
[ Valid | 003/300 ] loss = 1.87729, acc = 0.33246


T: 004/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000138, b_loss=1.72, b_a


[ Train | 004/300 ] loss = 1.72907, acc = 0.39332


V: 004/300: 100%|██████| 26/26 [00:02<00:00, 11.69it/s, v_loss=1.7, v_acc=0.413]


[ Valid | 004/300 ] loss = 1.69759, acc = 0.41344
[ Valid | 004/300 ] loss = 1.69759, acc = 0.41344 -> best
Best model found at fold 1 epoch 4, acc=0.41344, saving model


T: 005/300: 100%|█| 78/78 [00:07<00:00, 10.13it/s, lr=3.82e-5, b_loss=1.79, b_ac


[ Train | 005/300 ] loss = 1.62711, acc = 0.43441


V: 005/300: 100%|█████| 26/26 [00:02<00:00, 11.95it/s, v_loss=1.53, v_acc=0.465]


[ Valid | 005/300 ] loss = 1.53081, acc = 0.46480
[ Valid | 005/300 ] loss = 1.53081, acc = 0.46480 -> best
Best model found at fold 1 epoch 5, acc=0.46480, saving model


T: 006/300: 100%|█| 78/78 [00:07<00:00, 10.00it/s, lr=0.0004, b_loss=1.54, b_acc


[ Train | 006/300 ] loss = 1.77232, acc = 0.38649


V: 006/300: 100%|█████| 26/26 [00:02<00:00, 12.56it/s, v_loss=1.73, v_acc=0.396]


[ Valid | 006/300 ] loss = 1.73256, acc = 0.39593
[ Valid | 006/300 ] loss = 1.73256, acc = 0.39593


T: 007/300: 100%|█| 78/78 [00:07<00:00, 10.26it/s, lr=0.00039, b_loss=1.75, b_ac


[ Train | 007/300 ] loss = 1.69749, acc = 0.40666


V: 007/300: 100%|█████| 26/26 [00:02<00:00, 11.74it/s, v_loss=1.73, v_acc=0.391]


[ Valid | 007/300 ] loss = 1.73400, acc = 0.39140
[ Valid | 007/300 ] loss = 1.73400, acc = 0.39140


T: 008/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000362, b_loss=1.69, b_a


[ Train | 008/300 ] loss = 1.63393, acc = 0.43453


V: 008/300: 100%|█████| 26/26 [00:01<00:00, 13.26it/s, v_loss=1.83, v_acc=0.375]


[ Valid | 008/300 ] loss = 1.83493, acc = 0.37460
[ Valid | 008/300 ] loss = 1.83493, acc = 0.37460


T: 009/300: 100%|█| 78/78 [00:07<00:00, 10.32it/s, lr=0.000318, b_loss=1.76, b_a


[ Train | 009/300 ] loss = 1.56334, acc = 0.45394


V: 009/300: 100%|█████| 26/26 [00:01<00:00, 14.08it/s, v_loss=1.93, v_acc=0.392]


[ Valid | 009/300 ] loss = 1.92789, acc = 0.39172
[ Valid | 009/300 ] loss = 1.92789, acc = 0.39172


T: 010/300: 100%|█| 78/78 [00:07<00:00, 10.22it/s, lr=0.000262, b_loss=1.53, b_a


[ Train | 010/300 ] loss = 1.48531, acc = 0.48468


V: 010/300: 100%|█████| 26/26 [00:02<00:00, 12.37it/s, v_loss=1.77, v_acc=0.399]


[ Valid | 010/300 ] loss = 1.76590, acc = 0.39916
[ Valid | 010/300 ] loss = 1.76590, acc = 0.39916


T: 011/300: 100%|█| 78/78 [00:07<00:00, 10.03it/s, lr=0.0002, b_loss=1.37, b_acc


[ Train | 011/300 ] loss = 1.43067, acc = 0.50070


V: 011/300: 100%|█████| 26/26 [00:02<00:00, 12.91it/s, v_loss=1.49, v_acc=0.499]


[ Valid | 011/300 ] loss = 1.49334, acc = 0.49933
[ Valid | 011/300 ] loss = 1.49334, acc = 0.49933 -> best
Best model found at fold 1 epoch 11, acc=0.49933, saving model


T: 012/300:   0%|                                        | 0/78 [00:00<?, ?it/s]wandb: Network error (ConnectTimeout), entering retry loop.
T: 012/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=0.000138, b_loss=1.37, b_a


[ Train | 012/300 ] loss = 1.34853, acc = 0.53212


V: 012/300: 100%|█████| 26/26 [00:02<00:00, 11.19it/s, v_loss=1.34, v_acc=0.525]


[ Valid | 012/300 ] loss = 1.34247, acc = 0.52531
[ Valid | 012/300 ] loss = 1.34247, acc = 0.52531 -> best
Best model found at fold 1 epoch 12, acc=0.52531, saving model


T: 013/300: 100%|█| 78/78 [00:07<00:00, 10.56it/s, lr=8.24e-5, b_loss=1.38, b_ac


[ Train | 013/300 ] loss = 1.28662, acc = 0.55622


V: 013/300: 100%|█████| 26/26 [00:02<00:00, 12.12it/s, v_loss=1.37, v_acc=0.524]


[ Valid | 013/300 ] loss = 1.37485, acc = 0.52442
[ Valid | 013/300 ] loss = 1.37485, acc = 0.52442


T: 014/300: 100%|█| 78/78 [00:07<00:00, 10.25it/s, lr=3.82e-5, b_loss=1.17, b_ac


[ Train | 014/300 ] loss = 1.24964, acc = 0.56681


V: 014/300: 100%|█████| 26/26 [00:01<00:00, 13.28it/s, v_loss=1.23, v_acc=0.579]


[ Valid | 014/300 ] loss = 1.22618, acc = 0.57949
[ Valid | 014/300 ] loss = 1.22618, acc = 0.57949 -> best
Best model found at fold 1 epoch 14, acc=0.57949, saving model


T: 015/300: 100%|█| 78/78 [00:07<00:00, 10.23it/s, lr=9.79e-6, b_loss=1.17, b_ac


[ Train | 015/300 ] loss = 1.21088, acc = 0.58042


V: 015/300: 100%|█████| 26/26 [00:01<00:00, 14.03it/s, v_loss=1.17, v_acc=0.589]


[ Valid | 015/300 ] loss = 1.17272, acc = 0.58896
[ Valid | 015/300 ] loss = 1.17272, acc = 0.58896 -> best
Best model found at fold 1 epoch 15, acc=0.58896, saving model


T: 016/300: 100%|█| 78/78 [00:07<00:00, 10.55it/s, lr=0.0004, b_loss=1.36, b_acc


[ Train | 016/300 ] loss = 1.43168, acc = 0.50632


V: 016/300: 100%|█████| 26/26 [00:01<00:00, 13.37it/s, v_loss=1.68, v_acc=0.427]


[ Valid | 016/300 ] loss = 1.68242, acc = 0.42654
[ Valid | 016/300 ] loss = 1.68242, acc = 0.42654


T: 017/300: 100%|█| 78/78 [00:07<00:00, 10.33it/s, lr=0.000398, b_loss=1.45, b_a


[ Train | 017/300 ] loss = 1.40856, acc = 0.51057


V: 017/300: 100%|█████| 26/26 [00:02<00:00, 12.93it/s, v_loss=1.62, v_acc=0.452]


[ Valid | 017/300 ] loss = 1.62366, acc = 0.45165
[ Valid | 017/300 ] loss = 1.62366, acc = 0.45165


T: 018/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=0.00039, b_loss=1.24, b_ac


[ Train | 018/300 ] loss = 1.36921, acc = 0.52391


V: 018/300: 100%|██████| 26/26 [00:02<00:00, 11.31it/s, v_loss=1.7, v_acc=0.446]


[ Valid | 018/300 ] loss = 1.69753, acc = 0.44587
[ Valid | 018/300 ] loss = 1.69753, acc = 0.44587


T: 019/300: 100%|█| 78/78 [00:07<00:00, 10.44it/s, lr=0.000378, b_loss=1.42, b_a


[ Train | 019/300 ] loss = 1.31689, acc = 0.54453


V: 019/300: 100%|██████| 26/26 [00:02<00:00, 12.54it/s, v_loss=1.53, v_acc=0.48]


[ Valid | 019/300 ] loss = 1.53114, acc = 0.47954
[ Valid | 019/300 ] loss = 1.53114, acc = 0.47954


T: 020/300: 100%|█| 78/78 [00:07<00:00, 10.10it/s, lr=0.000362, b_loss=1.43, b_a


[ Train | 020/300 ] loss = 1.27749, acc = 0.56067


V: 020/300: 100%|█████| 26/26 [00:02<00:00, 12.60it/s, v_loss=1.28, v_acc=0.557]


[ Valid | 020/300 ] loss = 1.28217, acc = 0.55685
[ Valid | 020/300 ] loss = 1.28217, acc = 0.55685


T: 021/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000341, b_loss=1.09, b_a


[ Train | 021/300 ] loss = 1.23158, acc = 0.57516


V: 021/300: 100%|█████| 26/26 [00:01<00:00, 13.81it/s, v_loss=1.42, v_acc=0.521]


[ Valid | 021/300 ] loss = 1.42088, acc = 0.52134
[ Valid | 021/300 ] loss = 1.42088, acc = 0.52134


T: 022/300: 100%|█| 78/78 [00:07<00:00, 10.38it/s, lr=0.000318, b_loss=1.07, b_a


[ Train | 022/300 ] loss = 1.19007, acc = 0.59021


V: 022/300: 100%|█████| 26/26 [00:01<00:00, 14.13it/s, v_loss=1.47, v_acc=0.517]


[ Valid | 022/300 ] loss = 1.46667, acc = 0.51744
[ Valid | 022/300 ] loss = 1.46667, acc = 0.51744


T: 023/300: 100%|█| 78/78 [00:07<00:00, 10.27it/s, lr=0.000291, b_loss=1.2, b_ac


[ Train | 023/300 ] loss = 1.15218, acc = 0.59950


V: 023/300: 100%|█████| 26/26 [00:02<00:00, 12.46it/s, v_loss=1.39, v_acc=0.535]


[ Valid | 023/300 ] loss = 1.39462, acc = 0.53457
[ Valid | 023/300 ] loss = 1.39462, acc = 0.53457


T: 024/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=0.000262, b_loss=1.07, b_a


[ Train | 024/300 ] loss = 1.11876, acc = 0.61686


V: 024/300: 100%|█████| 26/26 [00:02<00:00, 11.25it/s, v_loss=1.16, v_acc=0.611]


[ Valid | 024/300 ] loss = 1.16306, acc = 0.61073
[ Valid | 024/300 ] loss = 1.16306, acc = 0.61073 -> best
Best model found at fold 1 epoch 24, acc=0.61073, saving model


T: 025/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000231, b_loss=1.03, b_a


[ Train | 025/300 ] loss = 1.08511, acc = 0.62523


V: 025/300: 100%|██████| 26/26 [00:01<00:00, 13.28it/s, v_loss=1.37, v_acc=0.53]


[ Valid | 025/300 ] loss = 1.36654, acc = 0.53043
[ Valid | 025/300 ] loss = 1.36654, acc = 0.53043


T: 026/300: 100%|█| 78/78 [00:07<00:00, 10.35it/s, lr=0.0002, b_loss=1.1, b_acc=


[ Train | 026/300 ] loss = 1.04931, acc = 0.63614


V: 026/300: 100%|█████| 26/26 [00:02<00:00, 12.95it/s, v_loss=1.28, v_acc=0.573]


[ Valid | 026/300 ] loss = 1.27565, acc = 0.57336
[ Valid | 026/300 ] loss = 1.27565, acc = 0.57336


T: 027/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000169, b_loss=1.03, b_a


[ Train | 027/300 ] loss = 1.03228, acc = 0.64626


V: 027/300: 100%|█████| 26/26 [00:01<00:00, 13.41it/s, v_loss=1.14, v_acc=0.621]


[ Valid | 027/300 ] loss = 1.14373, acc = 0.62060
[ Valid | 027/300 ] loss = 1.14373, acc = 0.62060 -> best
Best model found at fold 1 epoch 27, acc=0.62060, saving model


T: 028/300: 100%|█| 78/78 [00:07<00:00, 10.26it/s, lr=0.000138, b_loss=1.13, b_a


[ Train | 028/300 ] loss = 0.98616, acc = 0.65911


V: 028/300: 100%|██████| 26/26 [00:02<00:00, 12.01it/s, v_loss=1.1, v_acc=0.632]


[ Valid | 028/300 ] loss = 1.09774, acc = 0.63202
[ Valid | 028/300 ] loss = 1.09774, acc = 0.63202 -> best
Best model found at fold 1 epoch 28, acc=0.63202, saving model


T: 029/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=0.000109, b_loss=0.903, b_


[ Train | 029/300 ] loss = 0.96479, acc = 0.66921


V: 029/300: 100%|████████| 26/26 [00:01<00:00, 13.48it/s, v_loss=1, v_acc=0.661]


[ Valid | 029/300 ] loss = 1.00230, acc = 0.66120
[ Valid | 029/300 ] loss = 1.00230, acc = 0.66120 -> best
Best model found at fold 1 epoch 29, acc=0.66120, saving model


T: 030/300: 100%|█| 78/78 [00:07<00:00, 10.40it/s, lr=8.24e-5, b_loss=0.944, b_a


[ Train | 030/300 ] loss = 0.93280, acc = 0.68315


V: 030/300: 100%|████| 26/26 [00:02<00:00, 12.25it/s, v_loss=0.951, v_acc=0.682]


[ Valid | 030/300 ] loss = 0.95081, acc = 0.68199
[ Valid | 030/300 ] loss = 0.95081, acc = 0.68199 -> best
Best model found at fold 1 epoch 30, acc=0.68199, saving model


T: 031/300: 100%|█| 78/78 [00:07<00:00, 10.12it/s, lr=5.86e-5, b_loss=0.797, b_a


[ Train | 031/300 ] loss = 0.90257, acc = 0.68849


V: 031/300: 100%|████| 26/26 [00:01<00:00, 13.13it/s, v_loss=0.942, v_acc=0.679]


[ Valid | 031/300 ] loss = 0.94214, acc = 0.67898
[ Valid | 031/300 ] loss = 0.94214, acc = 0.67898


T: 032/300: 100%|█| 78/78 [00:07<00:00, 10.41it/s, lr=3.82e-5, b_loss=0.936, b_a


[ Train | 032/300 ] loss = 0.88219, acc = 0.69621


V: 032/300: 100%|████| 26/26 [00:02<00:00, 11.99it/s, v_loss=0.926, v_acc=0.693]


[ Valid | 032/300 ] loss = 0.92577, acc = 0.69258
[ Valid | 032/300 ] loss = 0.92577, acc = 0.69258 -> best
Best model found at fold 1 epoch 32, acc=0.69258, saving model


T: 033/300: 100%|█| 78/78 [00:07<00:00, 10.45it/s, lr=2.18e-5, b_loss=0.738, b_a


[ Train | 033/300 ] loss = 0.87063, acc = 0.70381


V: 033/300: 100%|████| 26/26 [00:02<00:00, 11.56it/s, v_loss=0.908, v_acc=0.691]


[ Valid | 033/300 ] loss = 0.90837, acc = 0.69137
[ Valid | 033/300 ] loss = 0.90837, acc = 0.69137


T: 034/300: 100%|█| 78/78 [00:07<00:00, 10.40it/s, lr=9.79e-6, b_loss=0.909, b_a


[ Train | 034/300 ] loss = 0.86158, acc = 0.70084


V: 034/300: 100%|██████| 26/26 [00:01<00:00, 13.14it/s, v_loss=0.901, v_acc=0.7]


[ Valid | 034/300 ] loss = 0.90060, acc = 0.69977
[ Valid | 034/300 ] loss = 0.90060, acc = 0.69977 -> best
Best model found at fold 1 epoch 34, acc=0.69977, saving model


T: 035/300: 100%|█| 78/78 [00:07<00:00, 10.48it/s, lr=2.46e-6, b_loss=0.853, b_a


[ Train | 035/300 ] loss = 0.84954, acc = 0.70808


V: 035/300: 100%|████| 26/26 [00:01<00:00, 14.46it/s, v_loss=0.895, v_acc=0.702]


[ Valid | 035/300 ] loss = 0.89466, acc = 0.70216
[ Valid | 035/300 ] loss = 0.89466, acc = 0.70216 -> best
Best model found at fold 1 epoch 35, acc=0.70216, saving model


T: 036/300: 100%|█| 78/78 [00:07<00:00, 10.31it/s, lr=0.0004, b_loss=1.12, b_acc


[ Train | 036/300 ] loss = 1.11939, acc = 0.61338


V: 036/300: 100%|██████| 26/26 [00:02<00:00, 12.79it/s, v_loss=1.9, v_acc=0.439]


[ Valid | 036/300 ] loss = 1.89551, acc = 0.43922
[ Valid | 036/300 ] loss = 1.89551, acc = 0.43922


T: 037/300: 100%|█| 78/78 [00:07<00:00, 10.22it/s, lr=0.000399, b_loss=1.04, b_a


[ Train | 037/300 ] loss = 1.09601, acc = 0.62629


V: 037/300: 100%|█████| 26/26 [00:01<00:00, 13.13it/s, v_loss=1.32, v_acc=0.548]


[ Valid | 037/300 ] loss = 1.31944, acc = 0.54751
[ Valid | 037/300 ] loss = 1.31944, acc = 0.54751


T: 038/300: 100%|█| 78/78 [00:07<00:00, 10.56it/s, lr=0.000398, b_loss=1.07, b_a


[ Train | 038/300 ] loss = 1.07509, acc = 0.63084


V: 038/300: 100%|█████| 26/26 [00:02<00:00, 11.57it/s, v_loss=1.19, v_acc=0.607]


[ Valid | 038/300 ] loss = 1.18749, acc = 0.60737
[ Valid | 038/300 ] loss = 1.18749, acc = 0.60737


T: 039/300: 100%|█| 78/78 [00:07<00:00, 10.57it/s, lr=0.000394, b_loss=1.05, b_a


[ Train | 039/300 ] loss = 1.05487, acc = 0.63113


V: 039/300: 100%|█████| 26/26 [00:02<00:00, 12.67it/s, v_loss=1.11, v_acc=0.624]


[ Valid | 039/300 ] loss = 1.10575, acc = 0.62434
[ Valid | 039/300 ] loss = 1.10575, acc = 0.62434


T: 040/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.00039, b_loss=1.04, b_ac


[ Train | 040/300 ] loss = 1.04252, acc = 0.64384


V: 040/300: 100%|█████| 26/26 [00:02<00:00, 11.61it/s, v_loss=1.34, v_acc=0.568]


[ Valid | 040/300 ] loss = 1.33609, acc = 0.56804
[ Valid | 040/300 ] loss = 1.33609, acc = 0.56804


T: 041/300: 100%|█| 78/78 [00:07<00:00, 10.04it/s, lr=0.000385, b_loss=1.22, b_a


[ Train | 041/300 ] loss = 1.02881, acc = 0.63953


V: 041/300: 100%|█████| 26/26 [00:02<00:00, 12.78it/s, v_loss=1.28, v_acc=0.585]


[ Valid | 041/300 ] loss = 1.28379, acc = 0.58454
[ Valid | 041/300 ] loss = 1.28379, acc = 0.58454


T: 042/300: 100%|█| 78/78 [00:07<00:00, 10.22it/s, lr=0.000378, b_loss=1.03, b_a


[ Train | 042/300 ] loss = 0.99942, acc = 0.65353


V: 042/300: 100%|█████| 26/26 [00:01<00:00, 13.73it/s, v_loss=1.14, v_acc=0.618]


[ Valid | 042/300 ] loss = 1.13918, acc = 0.61821
[ Valid | 042/300 ] loss = 1.13918, acc = 0.61821


T: 043/300: 100%|█| 78/78 [00:07<00:00, 10.48it/s, lr=0.000371, b_loss=0.933, b_


[ Train | 043/300 ] loss = 0.98229, acc = 0.66478


V: 043/300: 100%|█████| 26/26 [00:02<00:00, 12.42it/s, v_loss=1.13, v_acc=0.619]


[ Valid | 043/300 ] loss = 1.13361, acc = 0.61880
[ Valid | 043/300 ] loss = 1.13361, acc = 0.61880


T: 044/300: 100%|█| 78/78 [00:07<00:00, 10.40it/s, lr=0.000362, b_loss=1, b_acc=


[ Train | 044/300 ] loss = 0.95799, acc = 0.66996


V: 044/300: 100%|██████| 26/26 [00:02<00:00, 10.27it/s, v_loss=1.03, v_acc=0.65]


[ Valid | 044/300 ] loss = 1.02883, acc = 0.64957
[ Valid | 044/300 ] loss = 1.02883, acc = 0.64957


T: 045/300: 100%|█| 78/78 [00:07<00:00, 10.49it/s, lr=0.000352, b_loss=1.15, b_a


[ Train | 045/300 ] loss = 0.94668, acc = 0.66850


V: 045/300: 100%|█████| 26/26 [00:02<00:00, 12.43it/s, v_loss=1.71, v_acc=0.496]


[ Valid | 045/300 ] loss = 1.71489, acc = 0.49647
[ Valid | 045/300 ] loss = 1.71489, acc = 0.49647


T: 046/300: 100%|█| 78/78 [00:07<00:00, 10.54it/s, lr=0.000341, b_loss=0.931, b_


[ Train | 046/300 ] loss = 0.93715, acc = 0.68116


V: 046/300: 100%|█████| 26/26 [00:02<00:00, 12.82it/s, v_loss=1.17, v_acc=0.609]


[ Valid | 046/300 ] loss = 1.17419, acc = 0.60864
[ Valid | 046/300 ] loss = 1.17419, acc = 0.60864


T: 047/300: 100%|█| 78/78 [00:07<00:00, 10.41it/s, lr=0.00033, b_loss=0.895, b_a


[ Train | 047/300 ] loss = 0.90707, acc = 0.68823


V: 047/300: 100%|████| 26/26 [00:01<00:00, 13.97it/s, v_loss=0.974, v_acc=0.676]


[ Valid | 047/300 ] loss = 0.97400, acc = 0.67599
[ Valid | 047/300 ] loss = 0.97400, acc = 0.67599


T: 048/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000318, b_loss=0.959, b_


[ Train | 048/300 ] loss = 0.90370, acc = 0.68757


V: 048/300: 100%|█████| 26/26 [00:02<00:00, 12.51it/s, v_loss=1.17, v_acc=0.619]


[ Valid | 048/300 ] loss = 1.16678, acc = 0.61916
[ Valid | 048/300 ] loss = 1.16678, acc = 0.61916


T: 049/300: 100%|█| 78/78 [00:07<00:00, 10.37it/s, lr=0.000304, b_loss=0.723, b_


[ Train | 049/300 ] loss = 0.86867, acc = 0.70086


V: 049/300: 100%|████| 26/26 [00:02<00:00, 12.97it/s, v_loss=0.955, v_acc=0.673]


[ Valid | 049/300 ] loss = 0.95510, acc = 0.67266
[ Valid | 049/300 ] loss = 0.95510, acc = 0.67266


T: 050/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=0.000291, b_loss=0.865, b_


[ Train | 050/300 ] loss = 0.86370, acc = 0.70165


V: 050/300: 100%|█████| 26/26 [00:02<00:00, 12.27it/s, v_loss=1.02, v_acc=0.666]


[ Valid | 050/300 ] loss = 1.01655, acc = 0.66600
[ Valid | 050/300 ] loss = 1.01655, acc = 0.66600


T: 051/300: 100%|█| 78/78 [00:07<00:00, 10.10it/s, lr=0.000277, b_loss=0.764, b_


[ Train | 051/300 ] loss = 0.83602, acc = 0.71543


V: 051/300: 100%|█████| 26/26 [00:02<00:00, 12.34it/s, v_loss=1.05, v_acc=0.647]


[ Valid | 051/300 ] loss = 1.05431, acc = 0.64708
[ Valid | 051/300 ] loss = 1.05431, acc = 0.64708


T: 052/300: 100%|█| 78/78 [00:07<00:00, 10.10it/s, lr=0.000262, b_loss=0.86, b_a


[ Train | 052/300 ] loss = 0.82382, acc = 0.71859


V: 052/300: 100%|█████| 26/26 [00:01<00:00, 13.11it/s, v_loss=1.25, v_acc=0.602]


[ Valid | 052/300 ] loss = 1.25140, acc = 0.60159
[ Valid | 052/300 ] loss = 1.25140, acc = 0.60159


T: 053/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000247, b_loss=0.848, b_


[ Train | 053/300 ] loss = 0.81673, acc = 0.72121


V: 053/300: 100%|█████| 26/26 [00:02<00:00, 12.34it/s, v_loss=1.01, v_acc=0.663]


[ Valid | 053/300 ] loss = 1.00525, acc = 0.66262
[ Valid | 053/300 ] loss = 1.00525, acc = 0.66262


T: 054/300: 100%|█| 78/78 [00:07<00:00,  9.82it/s, lr=0.000231, b_loss=0.754, b_


[ Train | 054/300 ] loss = 0.80014, acc = 0.72755


V: 054/300: 100%|█████| 26/26 [00:02<00:00, 12.78it/s, v_loss=1.02, v_acc=0.667]


[ Valid | 054/300 ] loss = 1.02294, acc = 0.66716
[ Valid | 054/300 ] loss = 1.02294, acc = 0.66716


T: 055/300: 100%|█| 78/78 [00:07<00:00, 10.44it/s, lr=0.000216, b_loss=0.636, b_


[ Train | 055/300 ] loss = 0.77391, acc = 0.73314


V: 055/300: 100%|████| 26/26 [00:02<00:00, 11.89it/s, v_loss=0.875, v_acc=0.715]


[ Valid | 055/300 ] loss = 0.87548, acc = 0.71452
[ Valid | 055/300 ] loss = 0.87548, acc = 0.71452 -> best
Best model found at fold 1 epoch 55, acc=0.71452, saving model


T: 056/300: 100%|█| 78/78 [00:07<00:00, 10.31it/s, lr=0.0002, b_loss=0.707, b_ac


[ Train | 056/300 ] loss = 0.76757, acc = 0.73526


V: 056/300: 100%|████| 26/26 [00:02<00:00, 12.28it/s, v_loss=0.902, v_acc=0.708]


[ Valid | 056/300 ] loss = 0.90225, acc = 0.70756
[ Valid | 056/300 ] loss = 0.90225, acc = 0.70756


T: 057/300: 100%|█| 78/78 [00:07<00:00, 10.43it/s, lr=0.000184, b_loss=0.453, b_


[ Train | 057/300 ] loss = 0.75657, acc = 0.73612


V: 057/300: 100%|████████| 26/26 [00:01<00:00, 14.26it/s, v_loss=1, v_acc=0.668]


[ Valid | 057/300 ] loss = 1.00439, acc = 0.66846
[ Valid | 057/300 ] loss = 1.00439, acc = 0.66846


T: 058/300: 100%|█| 78/78 [00:07<00:00, 10.27it/s, lr=0.000169, b_loss=0.646, b_


[ Train | 058/300 ] loss = 0.71870, acc = 0.75593


V: 058/300: 100%|█████| 26/26 [00:02<00:00, 12.09it/s, v_loss=0.886, v_acc=0.71]


[ Valid | 058/300 ] loss = 0.88630, acc = 0.70998
[ Valid | 058/300 ] loss = 0.88630, acc = 0.70998


T: 059/300: 100%|█| 78/78 [00:07<00:00, 10.13it/s, lr=0.000153, b_loss=0.747, b_


[ Train | 059/300 ] loss = 0.70276, acc = 0.75508


V: 059/300: 100%|████| 26/26 [00:02<00:00, 12.91it/s, v_loss=0.886, v_acc=0.713]


[ Valid | 059/300 ] loss = 0.88609, acc = 0.71328
[ Valid | 059/300 ] loss = 0.88609, acc = 0.71328


T: 060/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.000138, b_loss=0.857, b_


[ Train | 060/300 ] loss = 0.69146, acc = 0.76180


V: 060/300: 100%|████| 26/26 [00:02<00:00, 12.81it/s, v_loss=0.839, v_acc=0.716]


[ Valid | 060/300 ] loss = 0.83896, acc = 0.71604
[ Valid | 060/300 ] loss = 0.83896, acc = 0.71604 -> best
Best model found at fold 1 epoch 60, acc=0.71604, saving model


T: 061/300: 100%|█| 78/78 [00:08<00:00,  8.97it/s, lr=0.000123, b_loss=0.707, b_


[ Train | 061/300 ] loss = 0.65634, acc = 0.77593


V: 061/300: 100%|████| 26/26 [00:02<00:00, 11.02it/s, v_loss=0.817, v_acc=0.728]


[ Valid | 061/300 ] loss = 0.81732, acc = 0.72773
[ Valid | 061/300 ] loss = 0.81732, acc = 0.72773 -> best
Best model found at fold 1 epoch 61, acc=0.72773, saving model


T: 062/300: 100%|█| 78/78 [00:08<00:00,  8.97it/s, lr=0.000109, b_loss=0.759, b_


[ Train | 062/300 ] loss = 0.66081, acc = 0.77542


V: 062/300: 100%|████| 26/26 [00:02<00:00,  9.85it/s, v_loss=0.819, v_acc=0.731]


[ Valid | 062/300 ] loss = 0.81922, acc = 0.73109
[ Valid | 062/300 ] loss = 0.81922, acc = 0.73109 -> best
Best model found at fold 1 epoch 62, acc=0.73109, saving model


T: 063/300: 100%|█| 78/78 [00:08<00:00,  9.20it/s, lr=9.55e-5, b_loss=0.688, b_a


[ Train | 063/300 ] loss = 0.64359, acc = 0.77667


V: 063/300: 100%|█████| 26/26 [00:02<00:00, 11.22it/s, v_loss=0.82, v_acc=0.734]


[ Valid | 063/300 ] loss = 0.82010, acc = 0.73409
[ Valid | 063/300 ] loss = 0.82010, acc = 0.73409 -> best
Best model found at fold 1 epoch 63, acc=0.73409, saving model


T: 064/300: 100%|█| 78/78 [00:08<00:00,  9.32it/s, lr=8.24e-5, b_loss=0.668, b_a


[ Train | 064/300 ] loss = 0.63021, acc = 0.77838


V: 064/300: 100%|████| 26/26 [00:02<00:00, 12.73it/s, v_loss=0.803, v_acc=0.735]


[ Valid | 064/300 ] loss = 0.80305, acc = 0.73489
[ Valid | 064/300 ] loss = 0.80305, acc = 0.73489 -> best
Best model found at fold 1 epoch 64, acc=0.73489, saving model


T: 065/300: 100%|█| 78/78 [00:07<00:00, 10.30it/s, lr=7.01e-5, b_loss=0.559, b_a


[ Train | 065/300 ] loss = 0.62067, acc = 0.78673


V: 065/300: 100%|████| 26/26 [00:01<00:00, 13.73it/s, v_loss=0.769, v_acc=0.745]


[ Valid | 065/300 ] loss = 0.76917, acc = 0.74490
[ Valid | 065/300 ] loss = 0.76917, acc = 0.74490 -> best
Best model found at fold 1 epoch 65, acc=0.74490, saving model


T: 066/300: 100%|█| 78/78 [00:07<00:00, 10.31it/s, lr=5.86e-5, b_loss=0.61, b_ac


[ Train | 066/300 ] loss = 0.60230, acc = 0.79014


V: 066/300: 100%|█████| 26/26 [00:02<00:00, 12.96it/s, v_loss=0.78, v_acc=0.743]


[ Valid | 066/300 ] loss = 0.78037, acc = 0.74299
[ Valid | 066/300 ] loss = 0.78037, acc = 0.74299


T: 067/300: 100%|█| 78/78 [00:07<00:00,  9.90it/s, lr=4.79e-5, b_loss=0.624, b_a


[ Train | 067/300 ] loss = 0.59513, acc = 0.79329


V: 067/300: 100%|█████| 26/26 [00:01<00:00, 13.26it/s, v_loss=0.782, v_acc=0.75]


[ Valid | 067/300 ] loss = 0.78215, acc = 0.74999
[ Valid | 067/300 ] loss = 0.78215, acc = 0.74999 -> best
Best model found at fold 1 epoch 67, acc=0.74999, saving model


T: 068/300: 100%|█| 78/78 [00:07<00:00, 10.52it/s, lr=3.82e-5, b_loss=0.52, b_ac


[ Train | 068/300 ] loss = 0.57354, acc = 0.80638


V: 068/300: 100%|████| 26/26 [00:02<00:00, 11.79it/s, v_loss=0.758, v_acc=0.753]


[ Valid | 068/300 ] loss = 0.75756, acc = 0.75337
[ Valid | 068/300 ] loss = 0.75756, acc = 0.75337 -> best
Best model found at fold 1 epoch 68, acc=0.75337, saving model


T: 069/300: 100%|█| 78/78 [00:07<00:00, 10.20it/s, lr=2.95e-5, b_loss=0.596, b_a


[ Train | 069/300 ] loss = 0.57438, acc = 0.80195


V: 069/300: 100%|████| 26/26 [00:02<00:00, 11.84it/s, v_loss=0.769, v_acc=0.752]


[ Valid | 069/300 ] loss = 0.76873, acc = 0.75237
[ Valid | 069/300 ] loss = 0.76873, acc = 0.75237


T: 070/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=2.18e-5, b_loss=0.632, b_a


[ Train | 070/300 ] loss = 0.55963, acc = 0.80227


V: 070/300: 100%|█████| 26/26 [00:01<00:00, 13.16it/s, v_loss=0.759, v_acc=0.75]


[ Valid | 070/300 ] loss = 0.75898, acc = 0.75026
[ Valid | 070/300 ] loss = 0.75898, acc = 0.75026


T: 071/300: 100%|█| 78/78 [00:07<00:00, 10.28it/s, lr=1.52e-5, b_loss=0.536, b_a


[ Train | 071/300 ] loss = 0.55991, acc = 0.80784


V: 071/300: 100%|████| 26/26 [00:02<00:00, 12.65it/s, v_loss=0.756, v_acc=0.752]


[ Valid | 071/300 ] loss = 0.75645, acc = 0.75182
[ Valid | 071/300 ] loss = 0.75645, acc = 0.75182


T: 072/300: 100%|█| 78/78 [00:07<00:00, 10.32it/s, lr=9.79e-6, b_loss=0.515, b_a


[ Train | 072/300 ] loss = 0.54605, acc = 0.81259


V: 072/300: 100%|████| 26/26 [00:02<00:00, 12.82it/s, v_loss=0.743, v_acc=0.755]


[ Valid | 072/300 ] loss = 0.74287, acc = 0.75540
[ Valid | 072/300 ] loss = 0.74287, acc = 0.75540 -> best
Best model found at fold 1 epoch 72, acc=0.75540, saving model


T: 073/300: 100%|█| 78/78 [00:07<00:00, 10.37it/s, lr=5.53e-6, b_loss=0.502, b_a


[ Train | 073/300 ] loss = 0.53646, acc = 0.81208


V: 073/300: 100%|████| 26/26 [00:02<00:00, 11.02it/s, v_loss=0.747, v_acc=0.757]


[ Valid | 073/300 ] loss = 0.74724, acc = 0.75661
[ Valid | 073/300 ] loss = 0.74724, acc = 0.75661 -> best
Best model found at fold 1 epoch 73, acc=0.75661, saving model


T: 074/300: 100%|█| 78/78 [00:07<00:00, 10.41it/s, lr=2.46e-6, b_loss=0.542, b_a


[ Train | 074/300 ] loss = 0.55035, acc = 0.80963


V: 074/300: 100%|████| 26/26 [00:01<00:00, 13.09it/s, v_loss=0.743, v_acc=0.757]


[ Valid | 074/300 ] loss = 0.74286, acc = 0.75717
[ Valid | 074/300 ] loss = 0.74286, acc = 0.75717 -> best
Best model found at fold 1 epoch 74, acc=0.75717, saving model


T: 075/300: 100%|█| 78/78 [00:07<00:00, 10.09it/s, lr=6.17e-7, b_loss=0.556, b_a


[ Train | 075/300 ] loss = 0.53623, acc = 0.81701


V: 075/300: 100%|████| 26/26 [00:02<00:00, 12.86it/s, v_loss=0.748, v_acc=0.756]


[ Valid | 075/300 ] loss = 0.74772, acc = 0.75628
[ Valid | 075/300 ] loss = 0.74772, acc = 0.75628


T: 076/300: 100%|█| 78/78 [00:07<00:00, 10.59it/s, lr=0.0004, b_loss=0.974, b_ac


[ Train | 076/300 ] loss = 0.83265, acc = 0.71719


V: 076/300: 100%|█████| 26/26 [00:02<00:00, 12.13it/s, v_loss=1.39, v_acc=0.586]


[ Valid | 076/300 ] loss = 1.39340, acc = 0.58635
[ Valid | 076/300 ] loss = 1.39340, acc = 0.58635


T: 077/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.0004, b_loss=0.894, b_ac


[ Train | 077/300 ] loss = 0.83878, acc = 0.71081


V: 077/300: 100%|█████| 26/26 [00:02<00:00, 11.89it/s, v_loss=1.74, v_acc=0.504]


[ Valid | 077/300 ] loss = 1.74203, acc = 0.50394
[ Valid | 077/300 ] loss = 1.74203, acc = 0.50394


T: 078/300: 100%|█| 78/78 [00:07<00:00,  9.87it/s, lr=0.000399, b_loss=0.812, b_


[ Train | 078/300 ] loss = 0.80834, acc = 0.71780


V: 078/300: 100%|██████| 26/26 [00:02<00:00, 12.03it/s, v_loss=1.1, v_acc=0.643]


[ Valid | 078/300 ] loss = 1.10445, acc = 0.64347
[ Valid | 078/300 ] loss = 1.10445, acc = 0.64347


T: 079/300: 100%|█| 78/78 [00:07<00:00, 10.23it/s, lr=0.000399, b_loss=0.788, b_


[ Train | 079/300 ] loss = 0.79390, acc = 0.72629


V: 079/300: 100%|█████| 26/26 [00:02<00:00, 12.05it/s, v_loss=1.38, v_acc=0.584]


[ Valid | 079/300 ] loss = 1.38390, acc = 0.58445
[ Valid | 079/300 ] loss = 1.38390, acc = 0.58445


T: 080/300: 100%|█| 78/78 [00:07<00:00,  9.93it/s, lr=0.000398, b_loss=0.918, b_


[ Train | 080/300 ] loss = 0.78612, acc = 0.72853


V: 080/300: 100%|████| 26/26 [00:02<00:00, 12.11it/s, v_loss=0.977, v_acc=0.687]


[ Valid | 080/300 ] loss = 0.97743, acc = 0.68678
[ Valid | 080/300 ] loss = 0.97743, acc = 0.68678


T: 081/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.000396, b_loss=0.782, b_


[ Train | 081/300 ] loss = 0.77802, acc = 0.72882


V: 081/300: 100%|██████| 26/26 [00:02<00:00, 12.62it/s, v_loss=1.09, v_acc=0.66]


[ Valid | 081/300 ] loss = 1.09046, acc = 0.66043
[ Valid | 081/300 ] loss = 1.09046, acc = 0.66043


T: 082/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=0.000394, b_loss=0.714, b_


[ Train | 082/300 ] loss = 0.76176, acc = 0.73925


V: 082/300: 100%|████| 26/26 [00:02<00:00, 12.87it/s, v_loss=0.971, v_acc=0.683]


[ Valid | 082/300 ] loss = 0.97073, acc = 0.68290
[ Valid | 082/300 ] loss = 0.97073, acc = 0.68290


T: 083/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=0.000392, b_loss=0.908, b_


[ Train | 083/300 ] loss = 0.75777, acc = 0.73878


V: 083/300: 100%|████| 26/26 [00:02<00:00, 11.49it/s, v_loss=0.975, v_acc=0.681]


[ Valid | 083/300 ] loss = 0.97472, acc = 0.68138
[ Valid | 083/300 ] loss = 0.97472, acc = 0.68138


T: 084/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=0.00039, b_loss=0.57, b_ac


[ Train | 084/300 ] loss = 0.74370, acc = 0.74024


V: 084/300: 100%|█████| 26/26 [00:02<00:00, 11.38it/s, v_loss=1.21, v_acc=0.623]


[ Valid | 084/300 ] loss = 1.21010, acc = 0.62301
[ Valid | 084/300 ] loss = 1.21010, acc = 0.62301


T: 085/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000388, b_loss=0.647, b_


[ Train | 085/300 ] loss = 0.74910, acc = 0.74018


V: 085/300: 100%|█████| 26/26 [00:02<00:00, 11.23it/s, v_loss=1.26, v_acc=0.608]


[ Valid | 085/300 ] loss = 1.25515, acc = 0.60762
[ Valid | 085/300 ] loss = 1.25515, acc = 0.60762


T: 086/300: 100%|█| 78/78 [00:07<00:00,  9.91it/s, lr=0.000385, b_loss=0.877, b_


[ Train | 086/300 ] loss = 0.73460, acc = 0.74816


V: 086/300: 100%|██████| 26/26 [00:01<00:00, 13.00it/s, v_loss=1.3, v_acc=0.603]


[ Valid | 086/300 ] loss = 1.29704, acc = 0.60349
[ Valid | 086/300 ] loss = 1.29704, acc = 0.60349


T: 087/300: 100%|█| 78/78 [00:07<00:00, 10.24it/s, lr=0.000382, b_loss=0.8, b_ac


[ Train | 087/300 ] loss = 0.71399, acc = 0.75490


V: 087/300: 100%|█████| 26/26 [00:01<00:00, 13.41it/s, v_loss=1.13, v_acc=0.648]


[ Valid | 087/300 ] loss = 1.13432, acc = 0.64772
[ Valid | 087/300 ] loss = 1.13432, acc = 0.64772


T: 088/300: 100%|█| 78/78 [00:07<00:00, 10.28it/s, lr=0.000378, b_loss=0.644, b_


[ Train | 088/300 ] loss = 0.70031, acc = 0.76276


V: 088/300: 100%|████| 26/26 [00:01<00:00, 13.19it/s, v_loss=0.845, v_acc=0.718]


[ Valid | 088/300 ] loss = 0.84470, acc = 0.71784
[ Valid | 088/300 ] loss = 0.84470, acc = 0.71784


T: 089/300: 100%|█| 78/78 [00:07<00:00, 10.38it/s, lr=0.000374, b_loss=0.732, b_


[ Train | 089/300 ] loss = 0.69403, acc = 0.76279


V: 089/300: 100%|███████| 26/26 [00:02<00:00, 12.16it/s, v_loss=1.2, v_acc=0.63]


[ Valid | 089/300 ] loss = 1.20342, acc = 0.63001
[ Valid | 089/300 ] loss = 1.20342, acc = 0.63001


T: 090/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.000371, b_loss=0.796, b_


[ Train | 090/300 ] loss = 0.70326, acc = 0.75529


V: 090/300: 100%|████| 26/26 [00:02<00:00, 12.29it/s, v_loss=0.995, v_acc=0.682]


[ Valid | 090/300 ] loss = 0.99459, acc = 0.68233
[ Valid | 090/300 ] loss = 0.99459, acc = 0.68233


T: 091/300: 100%|█| 78/78 [00:07<00:00, 10.07it/s, lr=0.000366, b_loss=0.918, b_


[ Train | 091/300 ] loss = 0.68491, acc = 0.76545


V: 091/300: 100%|████| 26/26 [00:02<00:00, 12.13it/s, v_loss=0.912, v_acc=0.712]


[ Valid | 091/300 ] loss = 0.91154, acc = 0.71181
[ Valid | 091/300 ] loss = 0.91154, acc = 0.71181


T: 092/300: 100%|█| 78/78 [00:07<00:00, 10.30it/s, lr=0.000362, b_loss=0.653, b_


[ Train | 092/300 ] loss = 0.67776, acc = 0.76857


V: 092/300: 100%|████| 26/26 [00:02<00:00, 12.92it/s, v_loss=0.971, v_acc=0.692]


[ Valid | 092/300 ] loss = 0.97130, acc = 0.69200
[ Valid | 092/300 ] loss = 0.97130, acc = 0.69200


T: 093/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000357, b_loss=0.678, b_


[ Train | 093/300 ] loss = 0.66231, acc = 0.76686


V: 093/300: 100%|█████| 26/26 [00:02<00:00, 12.67it/s, v_loss=1.03, v_acc=0.678]


[ Valid | 093/300 ] loss = 1.03426, acc = 0.67814
[ Valid | 093/300 ] loss = 1.03426, acc = 0.67814


T: 094/300: 100%|█| 78/78 [00:07<00:00, 10.31it/s, lr=0.000352, b_loss=0.593, b_


[ Train | 094/300 ] loss = 0.64824, acc = 0.77742


V: 094/300: 100%|████| 26/26 [00:01<00:00, 14.18it/s, v_loss=0.989, v_acc=0.693]


[ Valid | 094/300 ] loss = 0.98930, acc = 0.69314
[ Valid | 094/300 ] loss = 0.98930, acc = 0.69314


T: 095/300: 100%|█| 78/78 [00:07<00:00, 10.52it/s, lr=0.000347, b_loss=0.655, b_


[ Train | 095/300 ] loss = 0.64688, acc = 0.77747


V: 095/300: 100%|█████| 26/26 [00:02<00:00, 12.72it/s, v_loss=1.05, v_acc=0.677]

[ Valid | 095/300 ] loss = 1.05045, acc = 0.67661
[ Valid | 095/300 ] loss = 1.05045, acc = 0.67661
No improvment 20 consecutive epochs, early stopping





0,1
average_training_loss,█▇▆▆▅▄▄▅▄▄▃▃▃▃▂▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▂▂▂▂▂▂▂▁
average_validation_loss,█▆▅▆▆▄▃▅▃▄▄▃▂▂▂▄▃▃▂▃▂▂▂▂▂▁▁▁▁▁▁▁▃▂▂▃▃▂▂▂
learning rate,█▆▂▇▆▂▁█▇▇▅▄▃▂▁███▇▇▆▆▅▄▄▃▃▂▂▁▁▁█████▇▇▇
step_training_accuracy,▁▁▂▂▃▄▅▄▄▅▅▅▅▄▆▅▅▅▆▆▅▆▇▆▆▇▆▇█▇▇▇▆▆▆▆▆▇▇▇
step_training_loss,██▇▇▆▅▅▆▅▄▅▃▄▃▃▄▄▃▃▃▄▃▂▃▂▂▃▂▁▂▂▁▃▃▂▃▃▂▂▁
step_validation_accuracy,▁▂▃▁▂▃▅▂▄▄▄▅▆▇▆▄▄▆▄▅▅▅▆▆▆▇▇█▇▇▇▅▄▆▆▆▇▆▆▆
step_validation_loss,██▅█▇▅▄▇▄▅▅▃▂▂▂▅▅▃▇▃▄▄▃▂▃▂▁▁▁▁▂▅▄▃▂▄▂▃▄▂

0,1
average_training_loss,0.64688
average_validation_loss,1.05045
learning rate,0.00035
step_training_accuracy,0.77586
step_training_loss,0.65482
step_validation_accuracy,0.68548
step_validation_loss,1.14971




Starting Fold: 2 ********************************************
cuda:0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016668470650135228, max=1.0…

T: 001/300: 100%|█| 78/78 [00:07<00:00, 10.38it/s, lr=0.0004, b_loss=1.93, b_acc


[ Train | 001/300 ] loss = 2.11480, acc = 0.25339


V: 001/300: 100%|█████| 26/26 [00:01<00:00, 13.09it/s, v_loss=1.99, v_acc=0.302]


[ Valid | 001/300 ] loss = 1.98533, acc = 0.30185
[ Valid | 001/300 ] loss = 1.98533, acc = 0.30185 -> best
Best model found at fold 2 epoch 1, acc=0.30185, saving model


T: 002/300: 100%|█| 78/78 [00:07<00:00,  9.90it/s, lr=0.000362, b_loss=1.78, b_a


[ Train | 002/300 ] loss = 1.93803, acc = 0.32135


V: 002/300: 100%|█████| 26/26 [00:01<00:00, 13.11it/s, v_loss=1.89, v_acc=0.339]


[ Valid | 002/300 ] loss = 1.89441, acc = 0.33872
[ Valid | 002/300 ] loss = 1.89441, acc = 0.33872 -> best
Best model found at fold 2 epoch 2, acc=0.33872, saving model


T: 003/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000262, b_loss=1.75, b_a


[ Train | 003/300 ] loss = 1.83085, acc = 0.35640


V: 003/300: 100%|█████| 26/26 [00:01<00:00, 14.06it/s, v_loss=1.81, v_acc=0.373]


[ Valid | 003/300 ] loss = 1.80757, acc = 0.37345
[ Valid | 003/300 ] loss = 1.80757, acc = 0.37345 -> best
Best model found at fold 2 epoch 3, acc=0.37345, saving model


T: 004/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000138, b_loss=1.9, b_ac


[ Train | 004/300 ] loss = 1.72337, acc = 0.40097


V: 004/300: 100%|███████| 26/26 [00:01<00:00, 13.91it/s, v_loss=1.7, v_acc=0.42]


[ Valid | 004/300 ] loss = 1.69959, acc = 0.42035
[ Valid | 004/300 ] loss = 1.69959, acc = 0.42035 -> best
Best model found at fold 2 epoch 4, acc=0.42035, saving model


T: 005/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=3.82e-5, b_loss=1.66, b_ac


[ Train | 005/300 ] loss = 1.62099, acc = 0.43577


V: 005/300: 100%|█████| 26/26 [00:01<00:00, 13.45it/s, v_loss=1.56, v_acc=0.467]


[ Valid | 005/300 ] loss = 1.55814, acc = 0.46666
[ Valid | 005/300 ] loss = 1.55814, acc = 0.46666 -> best
Best model found at fold 2 epoch 5, acc=0.46666, saving model


T: 006/300: 100%|█| 78/78 [00:07<00:00, 10.06it/s, lr=0.0004, b_loss=1.66, b_acc


[ Train | 006/300 ] loss = 1.76327, acc = 0.37949


V: 006/300: 100%|█████| 26/26 [00:01<00:00, 14.60it/s, v_loss=2.01, v_acc=0.321]


[ Valid | 006/300 ] loss = 2.01156, acc = 0.32131
[ Valid | 006/300 ] loss = 2.01156, acc = 0.32131


T: 007/300: 100%|█| 78/78 [00:07<00:00, 10.30it/s, lr=0.00039, b_loss=1.57, b_ac


[ Train | 007/300 ] loss = 1.67518, acc = 0.41709


V: 007/300: 100%|█████| 26/26 [00:01<00:00, 14.07it/s, v_loss=1.88, v_acc=0.376]


[ Valid | 007/300 ] loss = 1.88356, acc = 0.37609
[ Valid | 007/300 ] loss = 1.88356, acc = 0.37609


T: 008/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000362, b_loss=1.44, b_a


[ Train | 008/300 ] loss = 1.60691, acc = 0.43978


V: 008/300: 100%|█████| 26/26 [00:01<00:00, 13.99it/s, v_loss=1.72, v_acc=0.437]


[ Valid | 008/300 ] loss = 1.72004, acc = 0.43657
[ Valid | 008/300 ] loss = 1.72004, acc = 0.43657


T: 009/300: 100%|█| 78/78 [00:07<00:00, 10.03it/s, lr=0.000318, b_loss=1.43, b_a


[ Train | 009/300 ] loss = 1.55820, acc = 0.45407


V: 009/300: 100%|█████| 26/26 [00:01<00:00, 13.16it/s, v_loss=1.85, v_acc=0.394]


[ Valid | 009/300 ] loss = 1.84882, acc = 0.39380
[ Valid | 009/300 ] loss = 1.84882, acc = 0.39380


T: 010/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000262, b_loss=1.48, b_a


[ Train | 010/300 ] loss = 1.49039, acc = 0.48427


V: 010/300: 100%|█████| 26/26 [00:01<00:00, 14.12it/s, v_loss=2.31, v_acc=0.299]


[ Valid | 010/300 ] loss = 2.31091, acc = 0.29904
[ Valid | 010/300 ] loss = 2.31091, acc = 0.29904


T: 011/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.0002, b_loss=1.49, b_acc


[ Train | 011/300 ] loss = 1.42384, acc = 0.50654


V: 011/300: 100%|██████| 26/26 [00:01<00:00, 13.92it/s, v_loss=1.45, v_acc=0.51]


[ Valid | 011/300 ] loss = 1.44748, acc = 0.51031
[ Valid | 011/300 ] loss = 1.44748, acc = 0.51031 -> best
Best model found at fold 2 epoch 11, acc=0.51031, saving model


T: 012/300: 100%|█| 78/78 [00:07<00:00, 10.04it/s, lr=0.000138, b_loss=1.32, b_a


[ Train | 012/300 ] loss = 1.35465, acc = 0.53315


V: 012/300: 100%|█████| 26/26 [00:01<00:00, 13.53it/s, v_loss=1.37, v_acc=0.532]


[ Valid | 012/300 ] loss = 1.36678, acc = 0.53159
[ Valid | 012/300 ] loss = 1.36678, acc = 0.53159 -> best
Best model found at fold 2 epoch 12, acc=0.53159, saving model


T: 013/300: 100%|█| 78/78 [00:07<00:00, 10.13it/s, lr=8.24e-5, b_loss=1.18, b_ac


[ Train | 013/300 ] loss = 1.31056, acc = 0.54991


V: 013/300: 100%|█████| 26/26 [00:01<00:00, 14.10it/s, v_loss=1.31, v_acc=0.554]


[ Valid | 013/300 ] loss = 1.31169, acc = 0.55373
[ Valid | 013/300 ] loss = 1.31169, acc = 0.55373 -> best
Best model found at fold 2 epoch 13, acc=0.55373, saving model


T: 014/300: 100%|█| 78/78 [00:07<00:00,  9.86it/s, lr=3.82e-5, b_loss=1.11, b_ac


[ Train | 014/300 ] loss = 1.25048, acc = 0.56961


V: 014/300: 100%|██████| 26/26 [00:01<00:00, 14.24it/s, v_loss=1.2, v_acc=0.588]


[ Valid | 014/300 ] loss = 1.19682, acc = 0.58813
[ Valid | 014/300 ] loss = 1.19682, acc = 0.58813 -> best
Best model found at fold 2 epoch 14, acc=0.58813, saving model


T: 015/300: 100%|█| 78/78 [00:07<00:00, 10.07it/s, lr=9.79e-6, b_loss=1.19, b_ac


[ Train | 015/300 ] loss = 1.22068, acc = 0.57801


V: 015/300: 100%|█████| 26/26 [00:02<00:00, 12.59it/s, v_loss=1.19, v_acc=0.592]


[ Valid | 015/300 ] loss = 1.18942, acc = 0.59208
[ Valid | 015/300 ] loss = 1.18942, acc = 0.59208 -> best
Best model found at fold 2 epoch 15, acc=0.59208, saving model


T: 016/300: 100%|█| 78/78 [00:08<00:00,  9.61it/s, lr=0.0004, b_loss=1.42, b_acc


[ Train | 016/300 ] loss = 1.44906, acc = 0.50233


V: 016/300: 100%|█████| 26/26 [00:02<00:00, 12.72it/s, v_loss=1.53, v_acc=0.482]


[ Valid | 016/300 ] loss = 1.53235, acc = 0.48228
[ Valid | 016/300 ] loss = 1.53235, acc = 0.48228


T: 017/300: 100%|█| 78/78 [00:07<00:00,  9.95it/s, lr=0.000398, b_loss=1.39, b_a


[ Train | 017/300 ] loss = 1.41840, acc = 0.50661


V: 017/300: 100%|█████| 26/26 [00:01<00:00, 13.38it/s, v_loss=1.65, v_acc=0.461]


[ Valid | 017/300 ] loss = 1.64996, acc = 0.46089
[ Valid | 017/300 ] loss = 1.64996, acc = 0.46089


T: 018/300: 100%|█| 78/78 [00:08<00:00,  9.71it/s, lr=0.00039, b_loss=1.31, b_ac


[ Train | 018/300 ] loss = 1.37039, acc = 0.52765


V: 018/300: 100%|█████| 26/26 [00:01<00:00, 13.23it/s, v_loss=1.37, v_acc=0.534]


[ Valid | 018/300 ] loss = 1.36837, acc = 0.53400
[ Valid | 018/300 ] loss = 1.36837, acc = 0.53400


T: 019/300: 100%|█| 78/78 [00:07<00:00, 10.00it/s, lr=0.000378, b_loss=1.18, b_a


[ Train | 019/300 ] loss = 1.34195, acc = 0.54375


V: 019/300: 100%|█████| 26/26 [00:01<00:00, 13.30it/s, v_loss=1.46, v_acc=0.505]


[ Valid | 019/300 ] loss = 1.45946, acc = 0.50486
[ Valid | 019/300 ] loss = 1.45946, acc = 0.50486


T: 020/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.000362, b_loss=1.44, b_a


[ Train | 020/300 ] loss = 1.28685, acc = 0.55635


V: 020/300: 100%|█████| 26/26 [00:01<00:00, 13.80it/s, v_loss=1.33, v_acc=0.552]


[ Valid | 020/300 ] loss = 1.33200, acc = 0.55212
[ Valid | 020/300 ] loss = 1.33200, acc = 0.55212


T: 021/300: 100%|█| 78/78 [00:07<00:00, 10.09it/s, lr=0.000341, b_loss=1.16, b_a


[ Train | 021/300 ] loss = 1.27680, acc = 0.55776


V: 021/300: 100%|█████| 26/26 [00:01<00:00, 13.69it/s, v_loss=1.42, v_acc=0.519]


[ Valid | 021/300 ] loss = 1.41885, acc = 0.51904
[ Valid | 021/300 ] loss = 1.41885, acc = 0.51904


T: 022/300: 100%|█| 78/78 [00:07<00:00, 10.01it/s, lr=0.000318, b_loss=1.32, b_a


[ Train | 022/300 ] loss = 1.22049, acc = 0.57546


V: 022/300: 100%|████████| 26/26 [00:01<00:00, 13.08it/s, v_loss=1.9, v_acc=0.4]


[ Valid | 022/300 ] loss = 1.89536, acc = 0.40010
[ Valid | 022/300 ] loss = 1.89536, acc = 0.40010


T: 023/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.000291, b_loss=1.12, b_a


[ Train | 023/300 ] loss = 1.17783, acc = 0.59091


V: 023/300: 100%|█████| 26/26 [00:01<00:00, 14.06it/s, v_loss=1.26, v_acc=0.578]


[ Valid | 023/300 ] loss = 1.25940, acc = 0.57784
[ Valid | 023/300 ] loss = 1.25940, acc = 0.57784


T: 024/300: 100%|█| 78/78 [00:07<00:00, 10.19it/s, lr=0.000262, b_loss=1.15, b_a


[ Train | 024/300 ] loss = 1.15423, acc = 0.60006


V: 024/300: 100%|█████| 26/26 [00:01<00:00, 14.13it/s, v_loss=1.38, v_acc=0.549]


[ Valid | 024/300 ] loss = 1.38466, acc = 0.54940
[ Valid | 024/300 ] loss = 1.38466, acc = 0.54940


T: 025/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000231, b_loss=1.25, b_a


[ Train | 025/300 ] loss = 1.11521, acc = 0.61170


V: 025/300: 100%|█████| 26/26 [00:01<00:00, 13.34it/s, v_loss=1.17, v_acc=0.611]


[ Valid | 025/300 ] loss = 1.16691, acc = 0.61136
[ Valid | 025/300 ] loss = 1.16691, acc = 0.61136 -> best
Best model found at fold 2 epoch 25, acc=0.61136, saving model


T: 026/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=0.0002, b_loss=1.18, b_acc


[ Train | 026/300 ] loss = 1.08326, acc = 0.62210


V: 026/300: 100%|█████| 26/26 [00:01<00:00, 13.17it/s, v_loss=1.09, v_acc=0.621]


[ Valid | 026/300 ] loss = 1.09352, acc = 0.62105
[ Valid | 026/300 ] loss = 1.09352, acc = 0.62105 -> best
Best model found at fold 2 epoch 26, acc=0.62105, saving model


T: 027/300: 100%|█| 78/78 [00:07<00:00, 10.42it/s, lr=0.000169, b_loss=0.972, b_


[ Train | 027/300 ] loss = 1.05109, acc = 0.63554


V: 027/300: 100%|█████| 26/26 [00:01<00:00, 13.81it/s, v_loss=1.11, v_acc=0.628]


[ Valid | 027/300 ] loss = 1.11298, acc = 0.62755
[ Valid | 027/300 ] loss = 1.11298, acc = 0.62755 -> best
Best model found at fold 2 epoch 27, acc=0.62755, saving model


T: 028/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=0.000138, b_loss=0.923, b_


[ Train | 028/300 ] loss = 1.01889, acc = 0.65053


V: 028/300: 100%|█████| 26/26 [00:01<00:00, 14.65it/s, v_loss=1.07, v_acc=0.638]


[ Valid | 028/300 ] loss = 1.07231, acc = 0.63776
[ Valid | 028/300 ] loss = 1.07231, acc = 0.63776 -> best
Best model found at fold 2 epoch 28, acc=0.63776, saving model


T: 029/300: 100%|█| 78/78 [00:07<00:00,  9.89it/s, lr=0.000109, b_loss=0.941, b_


[ Train | 029/300 ] loss = 0.99468, acc = 0.65915


V: 029/300: 100%|█████| 26/26 [00:01<00:00, 13.78it/s, v_loss=1.11, v_acc=0.629]


[ Valid | 029/300 ] loss = 1.11465, acc = 0.62932
[ Valid | 029/300 ] loss = 1.11465, acc = 0.62932


T: 030/300: 100%|█| 78/78 [00:07<00:00, 10.26it/s, lr=8.24e-5, b_loss=1.02, b_ac


[ Train | 030/300 ] loss = 0.96601, acc = 0.66627


V: 030/300: 100%|█████| 26/26 [00:01<00:00, 14.13it/s, v_loss=1.02, v_acc=0.653]


[ Valid | 030/300 ] loss = 1.02248, acc = 0.65252
[ Valid | 030/300 ] loss = 1.02248, acc = 0.65252 -> best
Best model found at fold 2 epoch 30, acc=0.65252, saving model


T: 031/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=5.86e-5, b_loss=1.13, b_ac


[ Train | 031/300 ] loss = 0.93950, acc = 0.68040


V: 031/300: 100%|████| 26/26 [00:02<00:00, 12.90it/s, v_loss=0.954, v_acc=0.677]


[ Valid | 031/300 ] loss = 0.95382, acc = 0.67665
[ Valid | 031/300 ] loss = 0.95382, acc = 0.67665 -> best
Best model found at fold 2 epoch 31, acc=0.67665, saving model


T: 032/300: 100%|█| 78/78 [00:07<00:00, 10.07it/s, lr=3.82e-5, b_loss=0.848, b_a


[ Train | 032/300 ] loss = 0.90421, acc = 0.68880


V: 032/300: 100%|████| 26/26 [00:01<00:00, 14.21it/s, v_loss=0.948, v_acc=0.678]


[ Valid | 032/300 ] loss = 0.94812, acc = 0.67775
[ Valid | 032/300 ] loss = 0.94812, acc = 0.67775 -> best
Best model found at fold 2 epoch 32, acc=0.67775, saving model


T: 033/300: 100%|█| 78/78 [00:07<00:00, 10.25it/s, lr=2.18e-5, b_loss=0.924, b_a


[ Train | 033/300 ] loss = 0.89620, acc = 0.68929


V: 033/300: 100%|█████| 26/26 [00:01<00:00, 14.42it/s, v_loss=0.93, v_acc=0.688]


[ Valid | 033/300 ] loss = 0.92970, acc = 0.68827
[ Valid | 033/300 ] loss = 0.92970, acc = 0.68827 -> best
Best model found at fold 2 epoch 33, acc=0.68827, saving model


T: 034/300: 100%|█| 78/78 [00:07<00:00, 10.42it/s, lr=9.79e-6, b_loss=1.02, b_ac


[ Train | 034/300 ] loss = 0.88502, acc = 0.69597


V: 034/300: 100%|████| 26/26 [00:02<00:00, 12.56it/s, v_loss=0.906, v_acc=0.696]


[ Valid | 034/300 ] loss = 0.90556, acc = 0.69588
[ Valid | 034/300 ] loss = 0.90556, acc = 0.69588 -> best
Best model found at fold 2 epoch 34, acc=0.69588, saving model


T: 035/300: 100%|█| 78/78 [00:07<00:00, 10.33it/s, lr=2.46e-6, b_loss=1.03, b_ac


[ Train | 035/300 ] loss = 0.87245, acc = 0.70355


V: 035/300: 100%|████| 26/26 [00:01<00:00, 14.03it/s, v_loss=0.909, v_acc=0.696]


[ Valid | 035/300 ] loss = 0.90906, acc = 0.69584
[ Valid | 035/300 ] loss = 0.90906, acc = 0.69584


T: 036/300: 100%|█| 78/78 [00:08<00:00,  9.58it/s, lr=0.0004, b_loss=1.5, b_acc=


[ Train | 036/300 ] loss = 1.15711, acc = 0.60158


V: 036/300: 100%|█████| 26/26 [00:01<00:00, 14.88it/s, v_loss=1.42, v_acc=0.533]


[ Valid | 036/300 ] loss = 1.41719, acc = 0.53280
[ Valid | 036/300 ] loss = 1.41719, acc = 0.53280


T: 037/300: 100%|█| 78/78 [00:07<00:00,  9.86it/s, lr=0.000399, b_loss=1.16, b_a


[ Train | 037/300 ] loss = 1.14353, acc = 0.60598


V: 037/300: 100%|█████| 26/26 [00:01<00:00, 15.27it/s, v_loss=1.31, v_acc=0.563]


[ Valid | 037/300 ] loss = 1.30956, acc = 0.56308
[ Valid | 037/300 ] loss = 1.30956, acc = 0.56308


T: 038/300: 100%|█| 78/78 [00:07<00:00, 10.27it/s, lr=0.000398, b_loss=1.11, b_a


[ Train | 038/300 ] loss = 1.10667, acc = 0.62236


V: 038/300: 100%|█████| 26/26 [00:01<00:00, 13.40it/s, v_loss=1.34, v_acc=0.565]


[ Valid | 038/300 ] loss = 1.34037, acc = 0.56532
[ Valid | 038/300 ] loss = 1.34037, acc = 0.56532


T: 039/300: 100%|█| 78/78 [00:07<00:00,  9.83it/s, lr=0.000394, b_loss=1.33, b_a


[ Train | 039/300 ] loss = 1.08554, acc = 0.61622


V: 039/300: 100%|█████| 26/26 [00:01<00:00, 13.12it/s, v_loss=1.22, v_acc=0.591]


[ Valid | 039/300 ] loss = 1.22278, acc = 0.59147
[ Valid | 039/300 ] loss = 1.22278, acc = 0.59147


T: 040/300: 100%|█| 78/78 [00:07<00:00, 10.36it/s, lr=0.00039, b_loss=1.13, b_ac


[ Train | 040/300 ] loss = 1.06151, acc = 0.62988


V: 040/300: 100%|███████| 26/26 [00:01<00:00, 13.19it/s, v_loss=1.55, v_acc=0.5]


[ Valid | 040/300 ] loss = 1.54639, acc = 0.50026
[ Valid | 040/300 ] loss = 1.54639, acc = 0.50026


T: 041/300: 100%|█| 78/78 [00:07<00:00, 10.01it/s, lr=0.000385, b_loss=1.06, b_a


[ Train | 041/300 ] loss = 1.06495, acc = 0.63140


V: 041/300: 100%|█████| 26/26 [00:01<00:00, 14.83it/s, v_loss=1.24, v_acc=0.585]


[ Valid | 041/300 ] loss = 1.23913, acc = 0.58451
[ Valid | 041/300 ] loss = 1.23913, acc = 0.58451


T: 042/300: 100%|█| 78/78 [00:07<00:00, 10.24it/s, lr=0.000378, b_loss=1.15, b_a


[ Train | 042/300 ] loss = 1.02715, acc = 0.65128


V: 042/300: 100%|█████| 26/26 [00:01<00:00, 13.29it/s, v_loss=1.18, v_acc=0.622]


[ Valid | 042/300 ] loss = 1.18120, acc = 0.62247
[ Valid | 042/300 ] loss = 1.18120, acc = 0.62247


T: 043/300: 100%|█| 78/78 [00:07<00:00, 10.19it/s, lr=0.000371, b_loss=1.09, b_a


[ Train | 043/300 ] loss = 1.00360, acc = 0.65191


V: 043/300: 100%|█████| 26/26 [00:01<00:00, 14.11it/s, v_loss=1.16, v_acc=0.617]


[ Valid | 043/300 ] loss = 1.16033, acc = 0.61721
[ Valid | 043/300 ] loss = 1.16033, acc = 0.61721


T: 044/300: 100%|█| 78/78 [00:07<00:00, 10.45it/s, lr=0.000362, b_loss=1, b_acc=


[ Train | 044/300 ] loss = 0.97847, acc = 0.66319


V: 044/300: 100%|█████| 26/26 [00:01<00:00, 14.90it/s, v_loss=1.08, v_acc=0.655]


[ Valid | 044/300 ] loss = 1.08399, acc = 0.65521
[ Valid | 044/300 ] loss = 1.08399, acc = 0.65521


T: 045/300: 100%|█| 78/78 [00:07<00:00, 10.23it/s, lr=0.000352, b_loss=0.952, b_


[ Train | 045/300 ] loss = 0.97528, acc = 0.66457


V: 045/300: 100%|█████| 26/26 [00:01<00:00, 14.20it/s, v_loss=1.21, v_acc=0.606]


[ Valid | 045/300 ] loss = 1.20548, acc = 0.60584
[ Valid | 045/300 ] loss = 1.20548, acc = 0.60584


T: 046/300: 100%|█| 78/78 [00:07<00:00, 10.04it/s, lr=0.000341, b_loss=1.04, b_a


[ Train | 046/300 ] loss = 0.96165, acc = 0.67084


V: 046/300: 100%|██████| 26/26 [00:02<00:00, 12.71it/s, v_loss=1.01, v_acc=0.65]


[ Valid | 046/300 ] loss = 1.01226, acc = 0.65034
[ Valid | 046/300 ] loss = 1.01226, acc = 0.65034


T: 047/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.00033, b_loss=1.13, b_ac


[ Train | 047/300 ] loss = 0.92559, acc = 0.67812


V: 047/300: 100%|████| 26/26 [00:01<00:00, 13.83it/s, v_loss=0.964, v_acc=0.675]


[ Valid | 047/300 ] loss = 0.96412, acc = 0.67503
[ Valid | 047/300 ] loss = 0.96412, acc = 0.67503


T: 048/300: 100%|█| 78/78 [00:07<00:00, 10.04it/s, lr=0.000318, b_loss=0.825, b_


[ Train | 048/300 ] loss = 0.93160, acc = 0.67606


V: 048/300: 100%|█████| 26/26 [00:01<00:00, 14.09it/s, v_loss=1.23, v_acc=0.596]


[ Valid | 048/300 ] loss = 1.22953, acc = 0.59597
[ Valid | 048/300 ] loss = 1.22953, acc = 0.59597


T: 049/300: 100%|█| 78/78 [00:07<00:00, 10.04it/s, lr=0.000304, b_loss=1.06, b_a


[ Train | 049/300 ] loss = 0.91040, acc = 0.68466


V: 049/300: 100%|█████| 26/26 [00:01<00:00, 14.18it/s, v_loss=1.09, v_acc=0.638]


[ Valid | 049/300 ] loss = 1.08942, acc = 0.63838
[ Valid | 049/300 ] loss = 1.08942, acc = 0.63838


T: 050/300: 100%|█| 78/78 [00:07<00:00,  9.97it/s, lr=0.000291, b_loss=0.718, b_


[ Train | 050/300 ] loss = 0.87140, acc = 0.69911


V: 050/300: 100%|█████| 26/26 [00:01<00:00, 15.35it/s, v_loss=1.12, v_acc=0.632]


[ Valid | 050/300 ] loss = 1.12064, acc = 0.63152
[ Valid | 050/300 ] loss = 1.12064, acc = 0.63152


T: 051/300: 100%|█| 78/78 [00:08<00:00,  9.61it/s, lr=0.000277, b_loss=0.673, b_


[ Train | 051/300 ] loss = 0.86413, acc = 0.70082


V: 051/300: 100%|█████| 26/26 [00:01<00:00, 13.62it/s, v_loss=1.05, v_acc=0.651]


[ Valid | 051/300 ] loss = 1.04783, acc = 0.65104
[ Valid | 051/300 ] loss = 1.04783, acc = 0.65104


T: 052/300: 100%|█| 78/78 [00:07<00:00, 10.33it/s, lr=0.000262, b_loss=0.852, b_


[ Train | 052/300 ] loss = 0.85242, acc = 0.70636


V: 052/300: 100%|██████| 26/26 [00:01<00:00, 14.23it/s, v_loss=1.6, v_acc=0.511]


[ Valid | 052/300 ] loss = 1.60330, acc = 0.51081
[ Valid | 052/300 ] loss = 1.60330, acc = 0.51081


T: 053/300: 100%|█| 78/78 [00:07<00:00, 10.12it/s, lr=0.000247, b_loss=0.865, b_


[ Train | 053/300 ] loss = 0.84275, acc = 0.70775


V: 053/300: 100%|█████| 26/26 [00:01<00:00, 14.44it/s, v_loss=1.14, v_acc=0.637]


[ Valid | 053/300 ] loss = 1.13743, acc = 0.63749
[ Valid | 053/300 ] loss = 1.13743, acc = 0.63749


T: 054/300: 100%|█| 78/78 [00:07<00:00, 10.02it/s, lr=0.000231, b_loss=0.83, b_a


[ Train | 054/300 ] loss = 0.81627, acc = 0.71752


V: 054/300: 100%|████| 26/26 [00:02<00:00, 12.98it/s, v_loss=0.984, v_acc=0.679]


[ Valid | 054/300 ] loss = 0.98368, acc = 0.67904
[ Valid | 054/300 ] loss = 0.98368, acc = 0.67904


T: 055/300: 100%|█| 78/78 [00:08<00:00,  9.58it/s, lr=0.000216, b_loss=0.855, b_


[ Train | 055/300 ] loss = 0.80122, acc = 0.72343


V: 055/300: 100%|████| 26/26 [00:01<00:00, 13.64it/s, v_loss=0.976, v_acc=0.679]

[ Valid | 055/300 ] loss = 0.97630, acc = 0.67866
[ Valid | 055/300 ] loss = 0.97630, acc = 0.67866
No improvment 20 consecutive epochs, early stopping





VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_training_loss,█▇▆▅▆▆▅▅▄▄▃▄▄▄▄▄▃▃▃▂▂▂▂▂▁▁▃▃▃▂▂▂▂▂▂▂▁▁▁▁
average_validation_loss,▆▆▅▄▇▆▆█▃▃▂▄▅▄▃▄▃▃▂▂▂▂▁▁▁▁▃▃▃▃▂▂▂▂▃▂▂▄▂▁
learning rate,█▇▆▂██▇▆▃▂▂███▇▇▆▆▅▄▃▂▂▂▁▁█████▇▇▇▇▆▆▆▅▅
step_training_accuracy,▁▁▂▂▃▄▄▄▅▅▅▃▄▆▅▅▅▆▇▆▅▇██▇▅▆▆▇▇▇▇▇▆▇▇▇▇▇▇
step_training_loss,█▇▇▇▆▅▅▅▄▃▃▅▄▃▄▃▃▃▂▃▃▁▁▁▁▃▂▃▂▂▃▁▂▂▂▁▂▂▁▁
step_validation_accuracy,▁▁▃▄▃▅▂▁▅▅▆▅▄▄▅▅▄▆▆▇▇▇█▇▇▆▆▆▄▆▇▇▇█▇▆▆▅▇█
step_validation_loss,▆▅▅▄▄▃▅█▃▃▂▄▄▃▃▃▃▃▂▂▂▂▁▂▂▃▂▃▄▂▂▃▂▁▂▂▂▃▂▂

0,1
average_training_loss,0.80122
average_validation_loss,0.9763
learning rate,0.00022
step_training_accuracy,0.72414
step_training_loss,0.85526
step_validation_accuracy,0.64516
step_validation_loss,0.99806




Starting Fold: 3 ********************************************
cuda:0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666843243341039, max=1.0)…

T: 001/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.0004, b_loss=1.89, b_acc


[ Train | 001/300 ] loss = 2.08079, acc = 0.25988


V: 001/300: 100%|█████| 26/26 [00:02<00:00, 12.68it/s, v_loss=2.08, v_acc=0.266]


[ Valid | 001/300 ] loss = 2.08126, acc = 0.26629
[ Valid | 001/300 ] loss = 2.08126, acc = 0.26629 -> best
Best model found at fold 3 epoch 1, acc=0.26629, saving model


T: 002/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=0.000362, b_loss=1.94, b_a


[ Train | 002/300 ] loss = 1.91012, acc = 0.32267


V: 002/300: 100%|██████| 26/26 [00:02<00:00, 12.83it/s, v_loss=1.9, v_acc=0.329]


[ Valid | 002/300 ] loss = 1.90060, acc = 0.32935
[ Valid | 002/300 ] loss = 1.90060, acc = 0.32935 -> best
Best model found at fold 3 epoch 2, acc=0.32935, saving model


T: 003/300: 100%|█| 78/78 [00:07<00:00, 10.46it/s, lr=0.000262, b_loss=1.79, b_a


[ Train | 003/300 ] loss = 1.80842, acc = 0.35659


V: 003/300: 100%|█████| 26/26 [00:01<00:00, 13.76it/s, v_loss=1.86, v_acc=0.348]


[ Valid | 003/300 ] loss = 1.86165, acc = 0.34766
[ Valid | 003/300 ] loss = 1.86165, acc = 0.34766 -> best
Best model found at fold 3 epoch 3, acc=0.34766, saving model


T: 004/300: 100%|█| 78/78 [00:07<00:00, 10.33it/s, lr=0.000138, b_loss=1.6, b_ac


[ Train | 004/300 ] loss = 1.71246, acc = 0.39889


V: 004/300: 100%|█████| 26/26 [00:01<00:00, 13.38it/s, v_loss=1.65, v_acc=0.436]


[ Valid | 004/300 ] loss = 1.65256, acc = 0.43592
[ Valid | 004/300 ] loss = 1.65256, acc = 0.43592 -> best
Best model found at fold 3 epoch 4, acc=0.43592, saving model


T: 005/300: 100%|█| 78/78 [00:07<00:00,  9.96it/s, lr=3.82e-5, b_loss=1.66, b_ac


[ Train | 005/300 ] loss = 1.62166, acc = 0.43090


V: 005/300: 100%|█████| 26/26 [00:01<00:00, 13.92it/s, v_loss=1.55, v_acc=0.468]


[ Valid | 005/300 ] loss = 1.55436, acc = 0.46806
[ Valid | 005/300 ] loss = 1.55436, acc = 0.46806 -> best
Best model found at fold 3 epoch 5, acc=0.46806, saving model


T: 006/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=0.0004, b_loss=1.65, b_acc


[ Train | 006/300 ] loss = 1.75933, acc = 0.38809


V: 006/300: 100%|█████| 26/26 [00:01<00:00, 14.32it/s, v_loss=1.95, v_acc=0.349]


[ Valid | 006/300 ] loss = 1.95394, acc = 0.34899
[ Valid | 006/300 ] loss = 1.95394, acc = 0.34899


T: 007/300: 100%|█| 78/78 [00:07<00:00, 10.26it/s, lr=0.00039, b_loss=1.54, b_ac


[ Train | 007/300 ] loss = 1.67730, acc = 0.41562


V: 007/300: 100%|██████| 26/26 [00:02<00:00, 12.76it/s, v_loss=1.7, v_acc=0.419]


[ Valid | 007/300 ] loss = 1.70098, acc = 0.41868
[ Valid | 007/300 ] loss = 1.70098, acc = 0.41868


T: 008/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000362, b_loss=1.49, b_a


[ Train | 008/300 ] loss = 1.60746, acc = 0.43723


V: 008/300: 100%|█████| 26/26 [00:02<00:00, 12.95it/s, v_loss=1.66, v_acc=0.429]


[ Valid | 008/300 ] loss = 1.66089, acc = 0.42867
[ Valid | 008/300 ] loss = 1.66089, acc = 0.42867


T: 009/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=0.000318, b_loss=1.51, b_a


[ Train | 009/300 ] loss = 1.53950, acc = 0.46249


V: 009/300: 100%|██████| 26/26 [00:02<00:00, 12.35it/s, v_loss=1.6, v_acc=0.455]


[ Valid | 009/300 ] loss = 1.60014, acc = 0.45511
[ Valid | 009/300 ] loss = 1.60014, acc = 0.45511


T: 010/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000262, b_loss=1.34, b_a


[ Train | 010/300 ] loss = 1.45403, acc = 0.49214


V: 010/300: 100%|██████| 26/26 [00:01<00:00, 13.63it/s, v_loss=1.5, v_acc=0.493]


[ Valid | 010/300 ] loss = 1.50160, acc = 0.49311
[ Valid | 010/300 ] loss = 1.50160, acc = 0.49311 -> best
Best model found at fold 3 epoch 10, acc=0.49311, saving model


T: 011/300: 100%|█| 78/78 [00:07<00:00, 10.24it/s, lr=0.0002, b_loss=1.3, b_acc=


[ Train | 011/300 ] loss = 1.41951, acc = 0.50694


V: 011/300: 100%|███████| 26/26 [00:02<00:00, 12.78it/s, v_loss=1.45, v_acc=0.5]


[ Valid | 011/300 ] loss = 1.45150, acc = 0.50026
[ Valid | 011/300 ] loss = 1.45150, acc = 0.50026 -> best
Best model found at fold 3 epoch 11, acc=0.50026, saving model


T: 012/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.000138, b_loss=1.35, b_a


[ Train | 012/300 ] loss = 1.34871, acc = 0.52819


V: 012/300: 100%|█████| 26/26 [00:02<00:00, 12.69it/s, v_loss=1.52, v_acc=0.489]


[ Valid | 012/300 ] loss = 1.51562, acc = 0.48944
[ Valid | 012/300 ] loss = 1.51562, acc = 0.48944


T: 013/300: 100%|█| 78/78 [00:07<00:00,  9.93it/s, lr=8.24e-5, b_loss=1.17, b_ac


[ Train | 013/300 ] loss = 1.28880, acc = 0.55021


V: 013/300: 100%|██████| 26/26 [00:01<00:00, 13.56it/s, v_loss=1.29, v_acc=0.56]


[ Valid | 013/300 ] loss = 1.28960, acc = 0.55993
[ Valid | 013/300 ] loss = 1.28960, acc = 0.55993 -> best
Best model found at fold 3 epoch 13, acc=0.55993, saving model


T: 014/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=3.82e-5, b_loss=1.1, b_acc


[ Train | 014/300 ] loss = 1.23141, acc = 0.57487


V: 014/300: 100%|███████| 26/26 [00:02<00:00, 12.94it/s, v_loss=1.2, v_acc=0.59]


[ Valid | 014/300 ] loss = 1.20203, acc = 0.59016
[ Valid | 014/300 ] loss = 1.20203, acc = 0.59016 -> best
Best model found at fold 3 epoch 14, acc=0.59016, saving model


T: 015/300: 100%|█| 78/78 [00:07<00:00,  9.91it/s, lr=9.79e-6, b_loss=1.36, b_ac


[ Train | 015/300 ] loss = 1.20377, acc = 0.58123


V: 015/300: 100%|███████| 26/26 [00:01<00:00, 13.64it/s, v_loss=1.2, v_acc=0.59]


[ Valid | 015/300 ] loss = 1.20168, acc = 0.59030
[ Valid | 015/300 ] loss = 1.20168, acc = 0.59030 -> best
Best model found at fold 3 epoch 15, acc=0.59030, saving model


T: 016/300: 100%|█| 78/78 [00:07<00:00,  9.93it/s, lr=0.0004, b_loss=1.56, b_acc


[ Train | 016/300 ] loss = 1.45693, acc = 0.49666


V: 016/300: 100%|█████| 26/26 [00:01<00:00, 13.84it/s, v_loss=2.35, v_acc=0.311]


[ Valid | 016/300 ] loss = 2.34942, acc = 0.31102
[ Valid | 016/300 ] loss = 2.34942, acc = 0.31102


T: 017/300: 100%|█| 78/78 [00:07<00:00,  9.85it/s, lr=0.000398, b_loss=1.45, b_a


[ Train | 017/300 ] loss = 1.39643, acc = 0.51874


V: 017/300: 100%|█████| 26/26 [00:02<00:00, 12.37it/s, v_loss=1.59, v_acc=0.466]


[ Valid | 017/300 ] loss = 1.58723, acc = 0.46550
[ Valid | 017/300 ] loss = 1.58723, acc = 0.46550


T: 018/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.00039, b_loss=1.37, b_ac


[ Train | 018/300 ] loss = 1.36336, acc = 0.52297


V: 018/300: 100%|██████| 26/26 [00:01<00:00, 14.36it/s, v_loss=1.49, v_acc=0.49]


[ Valid | 018/300 ] loss = 1.49443, acc = 0.49008
[ Valid | 018/300 ] loss = 1.49443, acc = 0.49008


T: 019/300: 100%|█| 78/78 [00:07<00:00,  9.81it/s, lr=0.000378, b_loss=1.3, b_ac


[ Train | 019/300 ] loss = 1.32044, acc = 0.54895


V: 019/300: 100%|█████| 26/26 [00:01<00:00, 13.68it/s, v_loss=1.47, v_acc=0.518]


[ Valid | 019/300 ] loss = 1.47489, acc = 0.51790
[ Valid | 019/300 ] loss = 1.47489, acc = 0.51790


T: 020/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=0.000362, b_loss=1.27, b_a


[ Train | 020/300 ] loss = 1.27653, acc = 0.55270


V: 020/300: 100%|█████| 26/26 [00:02<00:00, 12.44it/s, v_loss=1.62, v_acc=0.472]


[ Valid | 020/300 ] loss = 1.62185, acc = 0.47203
[ Valid | 020/300 ] loss = 1.62185, acc = 0.47203


T: 021/300: 100%|█| 78/78 [00:08<00:00,  9.67it/s, lr=0.000341, b_loss=1.21, b_a


[ Train | 021/300 ] loss = 1.25543, acc = 0.55649


V: 021/300: 100%|███████| 26/26 [00:02<00:00, 11.17it/s, v_loss=1.47, v_acc=0.5]


[ Valid | 021/300 ] loss = 1.47066, acc = 0.49980
[ Valid | 021/300 ] loss = 1.47066, acc = 0.49980


T: 022/300: 100%|█| 78/78 [00:07<00:00,  9.80it/s, lr=0.000318, b_loss=1.21, b_a


[ Train | 022/300 ] loss = 1.20579, acc = 0.58006


V: 022/300: 100%|██████| 26/26 [00:02<00:00, 11.67it/s, v_loss=1.37, v_acc=0.54]


[ Valid | 022/300 ] loss = 1.37329, acc = 0.54006
[ Valid | 022/300 ] loss = 1.37329, acc = 0.54006


T: 023/300: 100%|█| 78/78 [00:08<00:00,  9.59it/s, lr=0.000291, b_loss=1.08, b_a


[ Train | 023/300 ] loss = 1.19326, acc = 0.58672


V: 023/300: 100%|█████| 26/26 [00:01<00:00, 13.45it/s, v_loss=1.31, v_acc=0.553]


[ Valid | 023/300 ] loss = 1.30588, acc = 0.55317
[ Valid | 023/300 ] loss = 1.30588, acc = 0.55317


T: 024/300: 100%|█| 78/78 [00:08<00:00,  9.52it/s, lr=0.000262, b_loss=1.06, b_a


[ Train | 024/300 ] loss = 1.14358, acc = 0.60626


V: 024/300: 100%|█████| 26/26 [00:02<00:00, 12.64it/s, v_loss=1.14, v_acc=0.612]


[ Valid | 024/300 ] loss = 1.13742, acc = 0.61168
[ Valid | 024/300 ] loss = 1.13742, acc = 0.61168 -> best
Best model found at fold 3 epoch 24, acc=0.61168, saving model


T: 025/300: 100%|█| 78/78 [00:07<00:00,  9.79it/s, lr=0.000231, b_loss=1.14, b_a


[ Train | 025/300 ] loss = 1.10997, acc = 0.61734


V: 025/300: 100%|█████| 26/26 [00:01<00:00, 13.86it/s, v_loss=1.24, v_acc=0.586]


[ Valid | 025/300 ] loss = 1.23543, acc = 0.58623
[ Valid | 025/300 ] loss = 1.23543, acc = 0.58623


T: 026/300: 100%|█| 78/78 [00:07<00:00,  9.97it/s, lr=0.0002, b_loss=1.1, b_acc=


[ Train | 026/300 ] loss = 1.08129, acc = 0.62617


V: 026/300: 100%|█████| 26/26 [00:02<00:00, 12.28it/s, v_loss=1.12, v_acc=0.623]


[ Valid | 026/300 ] loss = 1.12044, acc = 0.62300
[ Valid | 026/300 ] loss = 1.12044, acc = 0.62300 -> best
Best model found at fold 3 epoch 26, acc=0.62300, saving model


T: 027/300: 100%|█| 78/78 [00:07<00:00, 10.20it/s, lr=0.000169, b_loss=0.829, b_


[ Train | 027/300 ] loss = 1.03178, acc = 0.64106


V: 027/300: 100%|█████| 26/26 [00:01<00:00, 13.95it/s, v_loss=1.11, v_acc=0.631]


[ Valid | 027/300 ] loss = 1.11473, acc = 0.63057
[ Valid | 027/300 ] loss = 1.11473, acc = 0.63057 -> best
Best model found at fold 3 epoch 27, acc=0.63057, saving model


T: 028/300: 100%|█| 78/78 [00:07<00:00, 10.06it/s, lr=0.000138, b_loss=1.12, b_a


[ Train | 028/300 ] loss = 1.01388, acc = 0.64711


V: 028/300: 100%|██████| 26/26 [00:02<00:00, 12.64it/s, v_loss=1.06, v_acc=0.64]


[ Valid | 028/300 ] loss = 1.06328, acc = 0.64017
[ Valid | 028/300 ] loss = 1.06328, acc = 0.64017 -> best
Best model found at fold 3 epoch 28, acc=0.64017, saving model


T: 029/300: 100%|█| 78/78 [00:07<00:00, 10.00it/s, lr=0.000109, b_loss=0.987, b_


[ Train | 029/300 ] loss = 0.98103, acc = 0.65854


V: 029/300: 100%|████| 26/26 [00:01<00:00, 14.22it/s, v_loss=0.989, v_acc=0.666]


[ Valid | 029/300 ] loss = 0.98871, acc = 0.66606
[ Valid | 029/300 ] loss = 0.98871, acc = 0.66606 -> best
Best model found at fold 3 epoch 29, acc=0.66606, saving model


T: 030/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=8.24e-5, b_loss=1.08, b_ac


[ Train | 030/300 ] loss = 0.96454, acc = 0.66935


V: 030/300: 100%|████| 26/26 [00:01<00:00, 13.48it/s, v_loss=0.992, v_acc=0.666]


[ Valid | 030/300 ] loss = 0.99180, acc = 0.66604
[ Valid | 030/300 ] loss = 0.99180, acc = 0.66604


T: 031/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=5.86e-5, b_loss=0.791, b_a


[ Train | 031/300 ] loss = 0.92735, acc = 0.67822


V: 031/300: 100%|█████| 26/26 [00:01<00:00, 13.80it/s, v_loss=0.998, v_acc=0.66]


[ Valid | 031/300 ] loss = 0.99838, acc = 0.66042
[ Valid | 031/300 ] loss = 0.99838, acc = 0.66042


T: 032/300: 100%|█| 78/78 [00:07<00:00, 10.12it/s, lr=3.82e-5, b_loss=0.871, b_a


[ Train | 032/300 ] loss = 0.92284, acc = 0.67992


V: 032/300: 100%|█████| 26/26 [00:02<00:00, 12.77it/s, v_loss=0.93, v_acc=0.684]


[ Valid | 032/300 ] loss = 0.93018, acc = 0.68387
[ Valid | 032/300 ] loss = 0.93018, acc = 0.68387 -> best
Best model found at fold 3 epoch 32, acc=0.68387, saving model


T: 033/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=2.18e-5, b_loss=0.953, b_a


[ Train | 033/300 ] loss = 0.89596, acc = 0.68934


V: 033/300: 100%|█████| 26/26 [00:02<00:00, 12.35it/s, v_loss=0.924, v_acc=0.68]


[ Valid | 033/300 ] loss = 0.92382, acc = 0.67959
[ Valid | 033/300 ] loss = 0.92382, acc = 0.67959


T: 034/300: 100%|█| 78/78 [00:07<00:00, 10.02it/s, lr=9.79e-6, b_loss=0.817, b_a


[ Train | 034/300 ] loss = 0.88174, acc = 0.69601


V: 034/300: 100%|█████| 26/26 [00:02<00:00, 11.86it/s, v_loss=0.904, v_acc=0.69]


[ Valid | 034/300 ] loss = 0.90386, acc = 0.69013
[ Valid | 034/300 ] loss = 0.90386, acc = 0.69013 -> best
Best model found at fold 3 epoch 34, acc=0.69013, saving model


T: 035/300: 100%|█| 78/78 [00:07<00:00,  9.78it/s, lr=2.46e-6, b_loss=0.883, b_a


[ Train | 035/300 ] loss = 0.88062, acc = 0.69694


V: 035/300: 100%|█████| 26/26 [00:01<00:00, 13.85it/s, v_loss=0.902, v_acc=0.69]


[ Valid | 035/300 ] loss = 0.90171, acc = 0.69018
[ Valid | 035/300 ] loss = 0.90171, acc = 0.69018 -> best
Best model found at fold 3 epoch 35, acc=0.69018, saving model


T: 036/300: 100%|█| 78/78 [00:07<00:00, 10.02it/s, lr=0.0004, b_loss=0.904, b_ac


[ Train | 036/300 ] loss = 1.14132, acc = 0.60136


V: 036/300: 100%|█████| 26/26 [00:01<00:00, 13.42it/s, v_loss=1.31, v_acc=0.557]


[ Valid | 036/300 ] loss = 1.31068, acc = 0.55680
[ Valid | 036/300 ] loss = 1.31068, acc = 0.55680


T: 037/300: 100%|█| 78/78 [00:07<00:00, 10.38it/s, lr=0.000399, b_loss=1.05, b_a


[ Train | 037/300 ] loss = 1.11960, acc = 0.61305


V: 037/300: 100%|█████| 26/26 [00:02<00:00, 12.80it/s, v_loss=1.25, v_acc=0.576]


[ Valid | 037/300 ] loss = 1.25073, acc = 0.57646
[ Valid | 037/300 ] loss = 1.25073, acc = 0.57646


T: 038/300: 100%|█| 78/78 [00:07<00:00, 10.12it/s, lr=0.000398, b_loss=1.1, b_ac


[ Train | 038/300 ] loss = 1.10326, acc = 0.62240


V: 038/300: 100%|█████| 26/26 [00:01<00:00, 13.10it/s, v_loss=1.17, v_acc=0.603]


[ Valid | 038/300 ] loss = 1.17326, acc = 0.60262
[ Valid | 038/300 ] loss = 1.17326, acc = 0.60262


T: 039/300: 100%|█| 78/78 [00:07<00:00, 10.37it/s, lr=0.000394, b_loss=1.17, b_a


[ Train | 039/300 ] loss = 1.06428, acc = 0.63169


V: 039/300: 100%|█████| 26/26 [00:01<00:00, 13.50it/s, v_loss=1.25, v_acc=0.593]


[ Valid | 039/300 ] loss = 1.25035, acc = 0.59296
[ Valid | 039/300 ] loss = 1.25035, acc = 0.59296


T: 040/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=0.00039, b_loss=1.3, b_acc


[ Train | 040/300 ] loss = 1.07588, acc = 0.63209


V: 040/300: 100%|█████| 26/26 [00:02<00:00, 12.90it/s, v_loss=1.41, v_acc=0.538]


[ Valid | 040/300 ] loss = 1.40975, acc = 0.53844
[ Valid | 040/300 ] loss = 1.40975, acc = 0.53844


T: 041/300: 100%|█| 78/78 [00:07<00:00, 10.01it/s, lr=0.000385, b_loss=0.982, b_


[ Train | 041/300 ] loss = 1.03223, acc = 0.64614


V: 041/300: 100%|██████| 26/26 [00:01<00:00, 13.94it/s, v_loss=1.3, v_acc=0.572]


[ Valid | 041/300 ] loss = 1.30190, acc = 0.57230
[ Valid | 041/300 ] loss = 1.30190, acc = 0.57230


T: 042/300: 100%|█| 78/78 [00:07<00:00, 10.05it/s, lr=0.000378, b_loss=0.997, b_


[ Train | 042/300 ] loss = 1.03361, acc = 0.64664


V: 042/300: 100%|██████| 26/26 [00:02<00:00, 12.84it/s, v_loss=1.1, v_acc=0.647]


[ Valid | 042/300 ] loss = 1.10243, acc = 0.64668
[ Valid | 042/300 ] loss = 1.10243, acc = 0.64668


T: 043/300: 100%|█| 78/78 [00:07<00:00,  9.97it/s, lr=0.000371, b_loss=1.11, b_a


[ Train | 043/300 ] loss = 0.99649, acc = 0.65852


V: 043/300: 100%|██████| 26/26 [00:01<00:00, 14.16it/s, v_loss=1.2, v_acc=0.605]


[ Valid | 043/300 ] loss = 1.19879, acc = 0.60505
[ Valid | 043/300 ] loss = 1.19879, acc = 0.60505


T: 044/300: 100%|█| 78/78 [00:07<00:00,  9.75it/s, lr=0.000362, b_loss=0.986, b_


[ Train | 044/300 ] loss = 0.97539, acc = 0.65972


V: 044/300: 100%|██████| 26/26 [00:02<00:00, 12.82it/s, v_loss=1.2, v_acc=0.612]


[ Valid | 044/300 ] loss = 1.20498, acc = 0.61184
[ Valid | 044/300 ] loss = 1.20498, acc = 0.61184


T: 045/300: 100%|█| 78/78 [00:07<00:00, 10.36it/s, lr=0.000352, b_loss=1.01, b_a


[ Train | 045/300 ] loss = 0.95988, acc = 0.67095


V: 045/300: 100%|██████| 26/26 [00:01<00:00, 14.89it/s, v_loss=1.13, v_acc=0.62]


[ Valid | 045/300 ] loss = 1.13491, acc = 0.62035
[ Valid | 045/300 ] loss = 1.13491, acc = 0.62035


T: 046/300: 100%|█| 78/78 [00:07<00:00, 10.10it/s, lr=0.000341, b_loss=0.977, b_


[ Train | 046/300 ] loss = 0.93718, acc = 0.67514


V: 046/300: 100%|█████| 26/26 [00:02<00:00, 11.95it/s, v_loss=1.09, v_acc=0.638]


[ Valid | 046/300 ] loss = 1.08621, acc = 0.63804
[ Valid | 046/300 ] loss = 1.08621, acc = 0.63804


T: 047/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=0.00033, b_loss=0.982, b_a


[ Train | 047/300 ] loss = 0.92144, acc = 0.68230


V: 047/300: 100%|██████| 26/26 [00:01<00:00, 13.07it/s, v_loss=1.12, v_acc=0.64]


[ Valid | 047/300 ] loss = 1.12208, acc = 0.63996
[ Valid | 047/300 ] loss = 1.12208, acc = 0.63996


T: 048/300: 100%|█| 78/78 [00:07<00:00,  9.91it/s, lr=0.000318, b_loss=1.14, b_a


[ Train | 048/300 ] loss = 0.92929, acc = 0.68169


V: 048/300: 100%|█████| 26/26 [00:01<00:00, 13.54it/s, v_loss=1.18, v_acc=0.618]


[ Valid | 048/300 ] loss = 1.18297, acc = 0.61797
[ Valid | 048/300 ] loss = 1.18297, acc = 0.61797


T: 049/300: 100%|█| 78/78 [00:07<00:00, 10.34it/s, lr=0.000304, b_loss=0.882, b_


[ Train | 049/300 ] loss = 0.91007, acc = 0.68500


V: 049/300: 100%|█████| 26/26 [00:01<00:00, 14.37it/s, v_loss=1.56, v_acc=0.517]


[ Valid | 049/300 ] loss = 1.56223, acc = 0.51652
[ Valid | 049/300 ] loss = 1.56223, acc = 0.51652


T: 050/300: 100%|█| 78/78 [00:07<00:00, 10.41it/s, lr=0.000291, b_loss=0.792, b_


[ Train | 050/300 ] loss = 0.87054, acc = 0.69997


V: 050/300: 100%|█████| 26/26 [00:01<00:00, 14.53it/s, v_loss=1.06, v_acc=0.642]


[ Valid | 050/300 ] loss = 1.06454, acc = 0.64176
[ Valid | 050/300 ] loss = 1.06454, acc = 0.64176


T: 051/300: 100%|█| 78/78 [00:07<00:00, 10.28it/s, lr=0.000277, b_loss=0.799, b_


[ Train | 051/300 ] loss = 0.85395, acc = 0.70237


V: 051/300: 100%|█████| 26/26 [00:01<00:00, 13.25it/s, v_loss=1.19, v_acc=0.618]


[ Valid | 051/300 ] loss = 1.19059, acc = 0.61830
[ Valid | 051/300 ] loss = 1.19059, acc = 0.61830


T: 052/300: 100%|█| 78/78 [00:07<00:00,  9.98it/s, lr=0.000262, b_loss=0.705, b_


[ Train | 052/300 ] loss = 0.85693, acc = 0.70130


V: 052/300: 100%|█████| 26/26 [00:01<00:00, 13.99it/s, v_loss=1.08, v_acc=0.643]


[ Valid | 052/300 ] loss = 1.07505, acc = 0.64325
[ Valid | 052/300 ] loss = 1.07505, acc = 0.64325


T: 053/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000247, b_loss=0.794, b_


[ Train | 053/300 ] loss = 0.83970, acc = 0.70780


V: 053/300: 100%|████| 26/26 [00:01<00:00, 13.36it/s, v_loss=0.993, v_acc=0.663]


[ Valid | 053/300 ] loss = 0.99251, acc = 0.66301
[ Valid | 053/300 ] loss = 0.99251, acc = 0.66301


T: 054/300: 100%|█| 78/78 [00:07<00:00,  9.95it/s, lr=0.000231, b_loss=0.775, b_


[ Train | 054/300 ] loss = 0.81796, acc = 0.71684


V: 054/300: 100%|█████| 26/26 [00:01<00:00, 14.40it/s, v_loss=1.01, v_acc=0.662]


[ Valid | 054/300 ] loss = 1.00912, acc = 0.66243
[ Valid | 054/300 ] loss = 1.00912, acc = 0.66243


T: 055/300: 100%|█| 78/78 [00:07<00:00, 10.33it/s, lr=0.000216, b_loss=0.912, b_


[ Train | 055/300 ] loss = 0.80549, acc = 0.72323


V: 055/300: 100%|█████| 26/26 [00:02<00:00, 12.70it/s, v_loss=0.94, v_acc=0.686]


[ Valid | 055/300 ] loss = 0.93960, acc = 0.68594
[ Valid | 055/300 ] loss = 0.93960, acc = 0.68594


T: 056/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.0002, b_loss=0.731, b_ac


[ Train | 056/300 ] loss = 0.77469, acc = 0.73056


V: 056/300: 100%|████| 26/26 [00:02<00:00, 12.16it/s, v_loss=0.951, v_acc=0.689]

[ Valid | 056/300 ] loss = 0.95120, acc = 0.68889
[ Valid | 056/300 ] loss = 0.95120, acc = 0.68889
No improvment 20 consecutive epochs, early stopping





VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_training_loss,█▇▇▆▆▅▅▅▄▄▃▅▄▄▄▃▃▃▃▂▂▂▂▂▂▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
average_validation_loss,▇▆▆▄▆▅▄▄▄▃▂█▄▄▄▃▃▂▂▂▁▁▁▁▁▃▃▃▃▃▂▂▂▂▂▂▂▁▂▁
learning rate,█▇▆▂█▇▇▆▃▂▁███▇▇▆▆▄▄▃▂▂▁▁█████▇▇▇▇▇▆▆▅▅▄
step_training_accuracy,▁▁▂▄▃▃▅▅▃▇▆▅▆▆▅▅▅▆▆▇▇▇█▇▆▅▆▆▇▇▆▇▇█▇█▇▇██
step_training_loss,▇█▇▆▆▆▄▄▅▃▄▅▄▄▄▄▄▄▂▂▃▂▂▂▃▄▃▃▃▂▃▂▂▁▂▁▂▂▁▁
step_validation_accuracy,▂▃▄▄▄▃▅▅▄▆▇▁▄▄▅▇▅▆▆▇█▇▇▇█▆▆▇▄▇▅▆▇▆▄█▇▆█▆
step_validation_loss,▆▆▅▄▄▄▃▃▄▃▂█▄▄▄▂▄▃▂▂▁▂▁▂▁▃▃▂▄▂▃▂▂▃▄▁▁▂▁▂

0,1
average_training_loss,0.77469
average_validation_loss,0.9512
learning rate,0.0002
step_training_accuracy,0.74138
step_training_loss,0.73109
step_validation_accuracy,0.65323
step_validation_loss,1.01214




Starting Fold: 4 ********************************************
cuda:0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666862770022514, max=1.0)…

T: 001/300: 100%|█| 78/78 [00:07<00:00, 10.35it/s, lr=0.0004, b_loss=1.94, b_acc


[ Train | 001/300 ] loss = 2.10839, acc = 0.26291


V: 001/300: 100%|█████| 26/26 [00:02<00:00, 12.37it/s, v_loss=2.07, v_acc=0.276]


[ Valid | 001/300 ] loss = 2.07448, acc = 0.27619
[ Valid | 001/300 ] loss = 2.07448, acc = 0.27619 -> best
Best model found at fold 4 epoch 1, acc=0.27619, saving model


T: 002/300: 100%|█| 78/78 [00:07<00:00, 10.40it/s, lr=0.000362, b_loss=2.03, b_a


[ Train | 002/300 ] loss = 1.94263, acc = 0.31492


V: 002/300: 100%|█████| 26/26 [00:01<00:00, 13.77it/s, v_loss=1.91, v_acc=0.333]


[ Valid | 002/300 ] loss = 1.91110, acc = 0.33336
[ Valid | 002/300 ] loss = 1.91110, acc = 0.33336 -> best
Best model found at fold 4 epoch 2, acc=0.33336, saving model


T: 003/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=0.000262, b_loss=1.67, b_a


[ Train | 003/300 ] loss = 1.83166, acc = 0.36434


V: 003/300: 100%|█████| 26/26 [00:02<00:00, 12.55it/s, v_loss=1.85, v_acc=0.342]


[ Valid | 003/300 ] loss = 1.85391, acc = 0.34168
[ Valid | 003/300 ] loss = 1.85391, acc = 0.34168 -> best
Best model found at fold 4 epoch 3, acc=0.34168, saving model


T: 004/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.000138, b_loss=1.66, b_a


[ Train | 004/300 ] loss = 1.73859, acc = 0.38995


V: 004/300: 100%|█████| 26/26 [00:02<00:00, 12.55it/s, v_loss=1.75, v_acc=0.382]


[ Valid | 004/300 ] loss = 1.75403, acc = 0.38241
[ Valid | 004/300 ] loss = 1.75403, acc = 0.38241 -> best
Best model found at fold 4 epoch 4, acc=0.38241, saving model


T: 005/300: 100%|█| 78/78 [00:07<00:00, 10.02it/s, lr=3.82e-5, b_loss=1.49, b_ac


[ Train | 005/300 ] loss = 1.63717, acc = 0.42859


V: 005/300: 100%|█████| 26/26 [00:02<00:00, 11.36it/s, v_loss=1.57, v_acc=0.462]


[ Valid | 005/300 ] loss = 1.56871, acc = 0.46152
[ Valid | 005/300 ] loss = 1.56871, acc = 0.46152 -> best
Best model found at fold 4 epoch 5, acc=0.46152, saving model


T: 006/300: 100%|█| 78/78 [00:07<00:00, 10.19it/s, lr=0.0004, b_loss=1.73, b_acc


[ Train | 006/300 ] loss = 1.78312, acc = 0.37987


V: 006/300: 100%|█████| 26/26 [00:01<00:00, 13.39it/s, v_loss=1.87, v_acc=0.371]


[ Valid | 006/300 ] loss = 1.87283, acc = 0.37125
[ Valid | 006/300 ] loss = 1.87283, acc = 0.37125


T: 007/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.00039, b_loss=1.84, b_ac


[ Train | 007/300 ] loss = 1.68353, acc = 0.41586


V: 007/300: 100%|█████| 26/26 [00:01<00:00, 13.21it/s, v_loss=1.69, v_acc=0.412]


[ Valid | 007/300 ] loss = 1.69162, acc = 0.41237
[ Valid | 007/300 ] loss = 1.69162, acc = 0.41237


T: 008/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.000362, b_loss=1.49, b_a


[ Train | 008/300 ] loss = 1.61455, acc = 0.44020


V: 008/300: 100%|█████| 26/26 [00:02<00:00, 12.51it/s, v_loss=1.71, v_acc=0.409]


[ Valid | 008/300 ] loss = 1.70834, acc = 0.40923
[ Valid | 008/300 ] loss = 1.70834, acc = 0.40923


T: 009/300: 100%|█| 78/78 [00:07<00:00,  9.96it/s, lr=0.000318, b_loss=1.45, b_a


[ Train | 009/300 ] loss = 1.53968, acc = 0.46540


V: 009/300: 100%|█████| 26/26 [00:02<00:00, 12.38it/s, v_loss=1.76, v_acc=0.401]


[ Valid | 009/300 ] loss = 1.76280, acc = 0.40074
[ Valid | 009/300 ] loss = 1.76280, acc = 0.40074


T: 010/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000262, b_loss=1.58, b_a


[ Train | 010/300 ] loss = 1.48315, acc = 0.48293


V: 010/300: 100%|████████| 26/26 [00:02<00:00, 11.68it/s, v_loss=2, v_acc=0.358]


[ Valid | 010/300 ] loss = 1.99694, acc = 0.35765
[ Valid | 010/300 ] loss = 1.99694, acc = 0.35765


T: 011/300: 100%|█| 78/78 [00:07<00:00, 10.20it/s, lr=0.0002, b_loss=1.48, b_acc


[ Train | 011/300 ] loss = 1.39152, acc = 0.51546


V: 011/300: 100%|█████| 26/26 [00:02<00:00, 12.31it/s, v_loss=1.42, v_acc=0.504]


[ Valid | 011/300 ] loss = 1.42215, acc = 0.50357
[ Valid | 011/300 ] loss = 1.42215, acc = 0.50357 -> best
Best model found at fold 4 epoch 11, acc=0.50357, saving model


T: 012/300: 100%|█| 78/78 [00:07<00:00, 10.41it/s, lr=0.000138, b_loss=1.35, b_a


[ Train | 012/300 ] loss = 1.34355, acc = 0.53326


V: 012/300: 100%|█████| 26/26 [00:02<00:00, 12.52it/s, v_loss=1.31, v_acc=0.559]


[ Valid | 012/300 ] loss = 1.30507, acc = 0.55900
[ Valid | 012/300 ] loss = 1.30507, acc = 0.55900 -> best
Best model found at fold 4 epoch 12, acc=0.55900, saving model


T: 013/300: 100%|█| 78/78 [00:07<00:00, 10.30it/s, lr=8.24e-5, b_loss=1.29, b_ac


[ Train | 013/300 ] loss = 1.27793, acc = 0.55211


V: 013/300: 100%|█████| 26/26 [00:01<00:00, 13.91it/s, v_loss=1.37, v_acc=0.543]


[ Valid | 013/300 ] loss = 1.37032, acc = 0.54279
[ Valid | 013/300 ] loss = 1.37032, acc = 0.54279


T: 014/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=3.82e-5, b_loss=1.21, b_ac


[ Train | 014/300 ] loss = 1.21766, acc = 0.57926


V: 014/300: 100%|█████| 26/26 [00:02<00:00, 12.78it/s, v_loss=1.19, v_acc=0.599]


[ Valid | 014/300 ] loss = 1.18697, acc = 0.59904
[ Valid | 014/300 ] loss = 1.18697, acc = 0.59904 -> best
Best model found at fold 4 epoch 14, acc=0.59904, saving model


T: 015/300: 100%|█| 78/78 [00:07<00:00, 10.12it/s, lr=9.79e-6, b_loss=1.27, b_ac


[ Train | 015/300 ] loss = 1.18329, acc = 0.58862


V: 015/300: 100%|█████| 26/26 [00:02<00:00, 12.61it/s, v_loss=1.17, v_acc=0.601]


[ Valid | 015/300 ] loss = 1.16758, acc = 0.60074
[ Valid | 015/300 ] loss = 1.16758, acc = 0.60074 -> best
Best model found at fold 4 epoch 15, acc=0.60074, saving model


T: 016/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.0004, b_loss=1.43, b_acc


[ Train | 016/300 ] loss = 1.43095, acc = 0.50333


V: 016/300: 100%|█████| 26/26 [00:02<00:00, 11.93it/s, v_loss=1.93, v_acc=0.368]


[ Valid | 016/300 ] loss = 1.92550, acc = 0.36821
[ Valid | 016/300 ] loss = 1.92550, acc = 0.36821


T: 017/300: 100%|█| 78/78 [00:07<00:00, 10.09it/s, lr=0.000398, b_loss=1.5, b_ac


[ Train | 017/300 ] loss = 1.38920, acc = 0.51423


V: 017/300: 100%|█████| 26/26 [00:02<00:00, 12.23it/s, v_loss=1.88, v_acc=0.388]


[ Valid | 017/300 ] loss = 1.87514, acc = 0.38757
[ Valid | 017/300 ] loss = 1.87514, acc = 0.38757


T: 018/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.00039, b_loss=1.17, b_ac


[ Train | 018/300 ] loss = 1.34075, acc = 0.54001


V: 018/300: 100%|█████| 26/26 [00:01<00:00, 13.10it/s, v_loss=1.42, v_acc=0.519]


[ Valid | 018/300 ] loss = 1.41899, acc = 0.51930
[ Valid | 018/300 ] loss = 1.41899, acc = 0.51930


T: 019/300: 100%|█| 78/78 [00:07<00:00, 10.20it/s, lr=0.000378, b_loss=1.36, b_a


[ Train | 019/300 ] loss = 1.30588, acc = 0.54884


V: 019/300: 100%|█████| 26/26 [00:01<00:00, 13.01it/s, v_loss=1.79, v_acc=0.454]


[ Valid | 019/300 ] loss = 1.78910, acc = 0.45376
[ Valid | 019/300 ] loss = 1.78910, acc = 0.45376


T: 020/300: 100%|█| 78/78 [00:07<00:00, 10.03it/s, lr=0.000362, b_loss=1.22, b_a


[ Train | 020/300 ] loss = 1.27977, acc = 0.55766


V: 020/300: 100%|█████| 26/26 [00:02<00:00, 11.97it/s, v_loss=1.39, v_acc=0.528]


[ Valid | 020/300 ] loss = 1.39230, acc = 0.52797
[ Valid | 020/300 ] loss = 1.39230, acc = 0.52797


T: 021/300: 100%|█| 78/78 [00:07<00:00, 10.23it/s, lr=0.000341, b_loss=1.07, b_a


[ Train | 021/300 ] loss = 1.22047, acc = 0.57378


V: 021/300: 100%|█████| 26/26 [00:02<00:00, 11.88it/s, v_loss=1.51, v_acc=0.513]


[ Valid | 021/300 ] loss = 1.50689, acc = 0.51334
[ Valid | 021/300 ] loss = 1.50689, acc = 0.51334


T: 022/300: 100%|█| 78/78 [00:07<00:00, 10.04it/s, lr=0.000318, b_loss=1.16, b_a


[ Train | 022/300 ] loss = 1.19247, acc = 0.58657


V: 022/300: 100%|█████| 26/26 [00:02<00:00, 11.10it/s, v_loss=1.25, v_acc=0.582]


[ Valid | 022/300 ] loss = 1.24900, acc = 0.58152
[ Valid | 022/300 ] loss = 1.24900, acc = 0.58152


T: 023/300: 100%|█| 78/78 [00:07<00:00, 10.28it/s, lr=0.000291, b_loss=1.14, b_a


[ Train | 023/300 ] loss = 1.16463, acc = 0.59854


V: 023/300: 100%|██████| 26/26 [00:02<00:00, 12.44it/s, v_loss=1.6, v_acc=0.476]


[ Valid | 023/300 ] loss = 1.59793, acc = 0.47621
[ Valid | 023/300 ] loss = 1.59793, acc = 0.47621


T: 024/300: 100%|█| 78/78 [00:07<00:00, 10.20it/s, lr=0.000262, b_loss=1.14, b_a


[ Train | 024/300 ] loss = 1.12327, acc = 0.61149


V: 024/300: 100%|███████| 26/26 [00:02<00:00, 12.52it/s, v_loss=1.22, v_acc=0.6]


[ Valid | 024/300 ] loss = 1.22116, acc = 0.60019
[ Valid | 024/300 ] loss = 1.22116, acc = 0.60019


T: 025/300: 100%|█| 78/78 [00:07<00:00, 10.10it/s, lr=0.000231, b_loss=0.965, b_


[ Train | 025/300 ] loss = 1.09282, acc = 0.62533


V: 025/300: 100%|█████| 26/26 [00:02<00:00, 12.76it/s, v_loss=1.27, v_acc=0.581]


[ Valid | 025/300 ] loss = 1.27407, acc = 0.58129
[ Valid | 025/300 ] loss = 1.27407, acc = 0.58129


T: 026/300: 100%|█| 78/78 [00:07<00:00, 10.22it/s, lr=0.0002, b_loss=1.16, b_acc


[ Train | 026/300 ] loss = 1.06382, acc = 0.63256


V: 026/300: 100%|█████| 26/26 [00:01<00:00, 13.21it/s, v_loss=1.19, v_acc=0.595]


[ Valid | 026/300 ] loss = 1.18508, acc = 0.59480
[ Valid | 026/300 ] loss = 1.18508, acc = 0.59480


T: 027/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.000169, b_loss=0.988, b_


[ Train | 027/300 ] loss = 1.02184, acc = 0.64654


V: 027/300: 100%|█████| 26/26 [00:02<00:00, 11.92it/s, v_loss=1.12, v_acc=0.614]


[ Valid | 027/300 ] loss = 1.12495, acc = 0.61383
[ Valid | 027/300 ] loss = 1.12495, acc = 0.61383 -> best
Best model found at fold 4 epoch 27, acc=0.61383, saving model


T: 028/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=0.000138, b_loss=1.03, b_a


[ Train | 028/300 ] loss = 1.00827, acc = 0.64633


V: 028/300: 100%|█████| 26/26 [00:01<00:00, 13.05it/s, v_loss=1.12, v_acc=0.633]


[ Valid | 028/300 ] loss = 1.12433, acc = 0.63301
[ Valid | 028/300 ] loss = 1.12433, acc = 0.63301 -> best
Best model found at fold 4 epoch 28, acc=0.63301, saving model


T: 029/300: 100%|█| 78/78 [00:07<00:00,  9.90it/s, lr=0.000109, b_loss=0.979, b_


[ Train | 029/300 ] loss = 0.96758, acc = 0.67078


V: 029/300: 100%|█████| 26/26 [00:02<00:00, 12.70it/s, v_loss=1.21, v_acc=0.604]


[ Valid | 029/300 ] loss = 1.20723, acc = 0.60447
[ Valid | 029/300 ] loss = 1.20723, acc = 0.60447


T: 030/300: 100%|█| 78/78 [00:07<00:00, 10.07it/s, lr=8.24e-5, b_loss=1.07, b_ac


[ Train | 030/300 ] loss = 0.95288, acc = 0.66744


V: 030/300: 100%|█████| 26/26 [00:02<00:00, 11.56it/s, v_loss=1.03, v_acc=0.654]


[ Valid | 030/300 ] loss = 1.02733, acc = 0.65438
[ Valid | 030/300 ] loss = 1.02733, acc = 0.65438 -> best
Best model found at fold 4 epoch 30, acc=0.65438, saving model


T: 031/300: 100%|█| 78/78 [00:07<00:00, 10.13it/s, lr=5.86e-5, b_loss=0.736, b_a


[ Train | 031/300 ] loss = 0.92273, acc = 0.67675


V: 031/300: 100%|████| 26/26 [00:02<00:00, 12.84it/s, v_loss=0.956, v_acc=0.671]


[ Valid | 031/300 ] loss = 0.95570, acc = 0.67114
[ Valid | 031/300 ] loss = 0.95570, acc = 0.67114 -> best
Best model found at fold 4 epoch 31, acc=0.67114, saving model


T: 032/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=3.82e-5, b_loss=0.841, b_a


[ Train | 032/300 ] loss = 0.89947, acc = 0.68956


V: 032/300: 100%|████| 26/26 [00:02<00:00, 12.62it/s, v_loss=0.922, v_acc=0.694]


[ Valid | 032/300 ] loss = 0.92174, acc = 0.69437
[ Valid | 032/300 ] loss = 0.92174, acc = 0.69437 -> best
Best model found at fold 4 epoch 32, acc=0.69437, saving model


T: 033/300: 100%|█| 78/78 [00:07<00:00, 10.24it/s, lr=2.18e-5, b_loss=0.809, b_a


[ Train | 033/300 ] loss = 0.88160, acc = 0.69365


V: 033/300: 100%|█████| 26/26 [00:02<00:00, 12.69it/s, v_loss=0.93, v_acc=0.691]


[ Valid | 033/300 ] loss = 0.93048, acc = 0.69105
[ Valid | 033/300 ] loss = 0.93048, acc = 0.69105


T: 034/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=9.79e-6, b_loss=0.837, b_a


[ Train | 034/300 ] loss = 0.85470, acc = 0.70461


V: 034/300: 100%|████| 26/26 [00:02<00:00, 12.56it/s, v_loss=0.914, v_acc=0.695]


[ Valid | 034/300 ] loss = 0.91436, acc = 0.69460
[ Valid | 034/300 ] loss = 0.91436, acc = 0.69460 -> best
Best model found at fold 4 epoch 34, acc=0.69460, saving model


T: 035/300: 100%|█| 78/78 [00:07<00:00, 10.21it/s, lr=2.46e-6, b_loss=0.793, b_a


[ Train | 035/300 ] loss = 0.86867, acc = 0.69669


V: 035/300: 100%|████| 26/26 [00:02<00:00, 12.60it/s, v_loss=0.913, v_acc=0.693]


[ Valid | 035/300 ] loss = 0.91288, acc = 0.69288
[ Valid | 035/300 ] loss = 0.91288, acc = 0.69288


T: 036/300: 100%|█| 78/78 [00:07<00:00, 10.03it/s, lr=0.0004, b_loss=1.15, b_acc


[ Train | 036/300 ] loss = 1.11716, acc = 0.61542


V: 036/300: 100%|█████| 26/26 [00:01<00:00, 13.58it/s, v_loss=1.33, v_acc=0.557]


[ Valid | 036/300 ] loss = 1.32511, acc = 0.55743
[ Valid | 036/300 ] loss = 1.32511, acc = 0.55743


T: 037/300: 100%|█| 78/78 [00:07<00:00, 10.23it/s, lr=0.000399, b_loss=0.963, b_


[ Train | 037/300 ] loss = 1.10259, acc = 0.61429


V: 037/300: 100%|███████| 26/26 [00:02<00:00, 12.65it/s, v_loss=1.4, v_acc=0.53]


[ Valid | 037/300 ] loss = 1.39712, acc = 0.53040
[ Valid | 037/300 ] loss = 1.39712, acc = 0.53040


T: 038/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.000398, b_loss=1.02, b_a


[ Train | 038/300 ] loss = 1.09812, acc = 0.62521


V: 038/300: 100%|█████| 26/26 [00:02<00:00, 11.37it/s, v_loss=1.19, v_acc=0.608]


[ Valid | 038/300 ] loss = 1.18574, acc = 0.60827
[ Valid | 038/300 ] loss = 1.18574, acc = 0.60827


T: 039/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.000394, b_loss=0.82, b_a


[ Train | 039/300 ] loss = 1.06158, acc = 0.63227


V: 039/300: 100%|█████| 26/26 [00:01<00:00, 13.26it/s, v_loss=1.14, v_acc=0.615]


[ Valid | 039/300 ] loss = 1.13930, acc = 0.61497
[ Valid | 039/300 ] loss = 1.13930, acc = 0.61497


T: 040/300: 100%|█| 78/78 [00:07<00:00,  9.89it/s, lr=0.00039, b_loss=1.11, b_ac


[ Train | 040/300 ] loss = 1.03826, acc = 0.63645


V: 040/300: 100%|█████| 26/26 [00:02<00:00, 12.98it/s, v_loss=1.34, v_acc=0.582]


[ Valid | 040/300 ] loss = 1.34422, acc = 0.58245
[ Valid | 040/300 ] loss = 1.34422, acc = 0.58245


T: 041/300: 100%|█| 78/78 [00:07<00:00, 10.40it/s, lr=0.000385, b_loss=1.07, b_a


[ Train | 041/300 ] loss = 1.02440, acc = 0.64738


V: 041/300: 100%|█████| 26/26 [00:02<00:00, 12.95it/s, v_loss=1.17, v_acc=0.606]


[ Valid | 041/300 ] loss = 1.17166, acc = 0.60562
[ Valid | 041/300 ] loss = 1.17166, acc = 0.60562


T: 042/300: 100%|█| 78/78 [00:07<00:00, 10.08it/s, lr=0.000378, b_loss=0.953, b_


[ Train | 042/300 ] loss = 1.00185, acc = 0.65434


V: 042/300: 100%|█████| 26/26 [00:02<00:00, 11.63it/s, v_loss=1.17, v_acc=0.613]


[ Valid | 042/300 ] loss = 1.17384, acc = 0.61310
[ Valid | 042/300 ] loss = 1.17384, acc = 0.61310


T: 043/300: 100%|█| 78/78 [00:07<00:00, 10.10it/s, lr=0.000371, b_loss=0.956, b_


[ Train | 043/300 ] loss = 0.96983, acc = 0.66279


V: 043/300: 100%|█████| 26/26 [00:01<00:00, 13.01it/s, v_loss=1.11, v_acc=0.638]


[ Valid | 043/300 ] loss = 1.10832, acc = 0.63751
[ Valid | 043/300 ] loss = 1.10832, acc = 0.63751


T: 044/300: 100%|█| 78/78 [00:07<00:00, 10.01it/s, lr=0.000362, b_loss=0.868, b_


[ Train | 044/300 ] loss = 0.97908, acc = 0.66186


V: 044/300: 100%|██████| 26/26 [00:02<00:00, 12.61it/s, v_loss=1.1, v_acc=0.637]


[ Valid | 044/300 ] loss = 1.09824, acc = 0.63680
[ Valid | 044/300 ] loss = 1.09824, acc = 0.63680


T: 045/300: 100%|█| 78/78 [00:07<00:00,  9.84it/s, lr=0.000352, b_loss=0.838, b_


[ Train | 045/300 ] loss = 0.93413, acc = 0.67771


V: 045/300: 100%|█████| 26/26 [00:02<00:00, 12.01it/s, v_loss=1.04, v_acc=0.651]


[ Valid | 045/300 ] loss = 1.04270, acc = 0.65098
[ Valid | 045/300 ] loss = 1.04270, acc = 0.65098


T: 046/300: 100%|█| 78/78 [00:07<00:00,  9.80it/s, lr=0.000341, b_loss=1, b_acc=


[ Train | 046/300 ] loss = 0.92961, acc = 0.67628


V: 046/300: 100%|█████| 26/26 [00:02<00:00, 12.71it/s, v_loss=1.01, v_acc=0.667]


[ Valid | 046/300 ] loss = 1.01219, acc = 0.66690
[ Valid | 046/300 ] loss = 1.01219, acc = 0.66690


T: 047/300: 100%|█| 78/78 [00:07<00:00, 10.22it/s, lr=0.00033, b_loss=1.05, b_ac


[ Train | 047/300 ] loss = 0.90137, acc = 0.68905


V: 047/300: 100%|█████| 26/26 [00:01<00:00, 13.61it/s, v_loss=1.06, v_acc=0.648]


[ Valid | 047/300 ] loss = 1.06394, acc = 0.64842
[ Valid | 047/300 ] loss = 1.06394, acc = 0.64842


T: 048/300: 100%|█| 78/78 [00:07<00:00, 10.26it/s, lr=0.000318, b_loss=0.931, b_


[ Train | 048/300 ] loss = 0.88831, acc = 0.69711


V: 048/300: 100%|█████| 26/26 [00:02<00:00, 12.59it/s, v_loss=1.01, v_acc=0.661]


[ Valid | 048/300 ] loss = 1.01230, acc = 0.66145
[ Valid | 048/300 ] loss = 1.01230, acc = 0.66145


T: 049/300: 100%|█| 78/78 [00:07<00:00,  9.98it/s, lr=0.000304, b_loss=0.824, b_


[ Train | 049/300 ] loss = 0.87276, acc = 0.69724


V: 049/300: 100%|█████| 26/26 [00:01<00:00, 13.23it/s, v_loss=1.07, v_acc=0.638]


[ Valid | 049/300 ] loss = 1.07440, acc = 0.63776
[ Valid | 049/300 ] loss = 1.07440, acc = 0.63776


T: 050/300: 100%|█| 78/78 [00:08<00:00,  9.61it/s, lr=0.000291, b_loss=0.825, b_


[ Train | 050/300 ] loss = 0.87461, acc = 0.69479


V: 050/300: 100%|█████| 26/26 [00:02<00:00, 12.59it/s, v_loss=1.05, v_acc=0.658]


[ Valid | 050/300 ] loss = 1.04547, acc = 0.65789
[ Valid | 050/300 ] loss = 1.04547, acc = 0.65789


T: 051/300: 100%|█| 78/78 [00:07<00:00, 10.14it/s, lr=0.000277, b_loss=1.03, b_a


[ Train | 051/300 ] loss = 0.84359, acc = 0.70656


V: 051/300: 100%|█████| 26/26 [00:02<00:00, 12.66it/s, v_loss=1.26, v_acc=0.611]


[ Valid | 051/300 ] loss = 1.25613, acc = 0.61132
[ Valid | 051/300 ] loss = 1.25613, acc = 0.61132


T: 052/300: 100%|█| 78/78 [00:07<00:00, 10.06it/s, lr=0.000262, b_loss=0.849, b_


[ Train | 052/300 ] loss = 0.81788, acc = 0.71185


V: 052/300: 100%|█████| 26/26 [00:02<00:00, 12.97it/s, v_loss=1.07, v_acc=0.665]


[ Valid | 052/300 ] loss = 1.06673, acc = 0.66480
[ Valid | 052/300 ] loss = 1.06673, acc = 0.66480


T: 053/300: 100%|█| 78/78 [00:07<00:00,  9.93it/s, lr=0.000247, b_loss=0.645, b_


[ Train | 053/300 ] loss = 0.79640, acc = 0.72049


V: 053/300: 100%|█████| 26/26 [00:02<00:00, 11.39it/s, v_loss=1.05, v_acc=0.668]


[ Valid | 053/300 ] loss = 1.04657, acc = 0.66816
[ Valid | 053/300 ] loss = 1.04657, acc = 0.66816


T: 054/300: 100%|█| 78/78 [00:07<00:00, 10.18it/s, lr=0.000231, b_loss=0.936, b_


[ Train | 054/300 ] loss = 0.80774, acc = 0.72047


V: 054/300: 100%|████| 26/26 [00:02<00:00, 11.97it/s, v_loss=0.914, v_acc=0.706]


[ Valid | 054/300 ] loss = 0.91352, acc = 0.70609
[ Valid | 054/300 ] loss = 0.91352, acc = 0.70609 -> best
Best model found at fold 4 epoch 54, acc=0.70609, saving model


T: 055/300: 100%|█| 78/78 [00:07<00:00, 10.01it/s, lr=0.000216, b_loss=0.759, b_


[ Train | 055/300 ] loss = 0.77923, acc = 0.72955


V: 055/300: 100%|████| 26/26 [00:02<00:00, 11.17it/s, v_loss=0.959, v_acc=0.689]


[ Valid | 055/300 ] loss = 0.95913, acc = 0.68870
[ Valid | 055/300 ] loss = 0.95913, acc = 0.68870


T: 056/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=0.0002, b_loss=0.855, b_ac


[ Train | 056/300 ] loss = 0.75908, acc = 0.73445


V: 056/300: 100%|████| 26/26 [00:02<00:00, 11.70it/s, v_loss=0.913, v_acc=0.699]


[ Valid | 056/300 ] loss = 0.91299, acc = 0.69907
[ Valid | 056/300 ] loss = 0.91299, acc = 0.69907


T: 057/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000184, b_loss=0.787, b_


[ Train | 057/300 ] loss = 0.74236, acc = 0.73974


V: 057/300: 100%|████| 26/26 [00:02<00:00, 12.31it/s, v_loss=0.983, v_acc=0.694]


[ Valid | 057/300 ] loss = 0.98281, acc = 0.69397
[ Valid | 057/300 ] loss = 0.98281, acc = 0.69397


T: 058/300: 100%|█| 78/78 [00:07<00:00,  9.96it/s, lr=0.000169, b_loss=0.816, b_


[ Train | 058/300 ] loss = 0.71848, acc = 0.74777


V: 058/300: 100%|████| 26/26 [00:02<00:00, 12.03it/s, v_loss=0.863, v_acc=0.727]


[ Valid | 058/300 ] loss = 0.86309, acc = 0.72675
[ Valid | 058/300 ] loss = 0.86309, acc = 0.72675 -> best
Best model found at fold 4 epoch 58, acc=0.72675, saving model


T: 059/300: 100%|█| 78/78 [00:07<00:00, 10.13it/s, lr=0.000153, b_loss=0.937, b_


[ Train | 059/300 ] loss = 0.70239, acc = 0.76014


V: 059/300: 100%|██████| 26/26 [00:02<00:00, 11.51it/s, v_loss=0.942, v_acc=0.7]


[ Valid | 059/300 ] loss = 0.94173, acc = 0.69970
[ Valid | 059/300 ] loss = 0.94173, acc = 0.69970


T: 060/300: 100%|█| 78/78 [00:07<00:00, 10.09it/s, lr=0.000138, b_loss=0.864, b_


[ Train | 060/300 ] loss = 0.70646, acc = 0.75755


V: 060/300: 100%|████| 26/26 [00:02<00:00, 12.07it/s, v_loss=0.885, v_acc=0.711]


[ Valid | 060/300 ] loss = 0.88485, acc = 0.71118
[ Valid | 060/300 ] loss = 0.88485, acc = 0.71118


T: 061/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000123, b_loss=0.645, b_


[ Train | 061/300 ] loss = 0.68399, acc = 0.75862


V: 061/300: 100%|████| 26/26 [00:02<00:00, 12.83it/s, v_loss=0.835, v_acc=0.736]


[ Valid | 061/300 ] loss = 0.83507, acc = 0.73595
[ Valid | 061/300 ] loss = 0.83507, acc = 0.73595 -> best
Best model found at fold 4 epoch 61, acc=0.73595, saving model


T: 062/300: 100%|█| 78/78 [00:07<00:00,  9.95it/s, lr=0.000109, b_loss=0.652, b_


[ Train | 062/300 ] loss = 0.65696, acc = 0.77208


V: 062/300: 100%|████| 26/26 [00:01<00:00, 13.36it/s, v_loss=0.859, v_acc=0.723]


[ Valid | 062/300 ] loss = 0.85911, acc = 0.72283
[ Valid | 062/300 ] loss = 0.85911, acc = 0.72283


T: 063/300: 100%|█| 78/78 [00:07<00:00, 10.62it/s, lr=9.55e-5, b_loss=0.622, b_a


[ Train | 063/300 ] loss = 0.65014, acc = 0.77897


V: 063/300: 100%|████| 26/26 [00:01<00:00, 14.17it/s, v_loss=0.865, v_acc=0.726]


[ Valid | 063/300 ] loss = 0.86495, acc = 0.72558
[ Valid | 063/300 ] loss = 0.86495, acc = 0.72558


T: 064/300: 100%|█| 78/78 [00:07<00:00, 10.16it/s, lr=8.24e-5, b_loss=0.532, b_a


[ Train | 064/300 ] loss = 0.63198, acc = 0.77929


V: 064/300: 100%|████| 26/26 [00:02<00:00, 12.80it/s, v_loss=0.805, v_acc=0.738]


[ Valid | 064/300 ] loss = 0.80514, acc = 0.73795
[ Valid | 064/300 ] loss = 0.80514, acc = 0.73795 -> best
Best model found at fold 4 epoch 64, acc=0.73795, saving model


T: 065/300: 100%|█| 78/78 [00:07<00:00,  9.90it/s, lr=7.01e-5, b_loss=0.699, b_a


[ Train | 065/300 ] loss = 0.61491, acc = 0.78816


V: 065/300: 100%|████| 26/26 [00:01<00:00, 13.09it/s, v_loss=0.825, v_acc=0.734]


[ Valid | 065/300 ] loss = 0.82502, acc = 0.73441
[ Valid | 065/300 ] loss = 0.82502, acc = 0.73441


T: 066/300: 100%|█| 78/78 [00:07<00:00,  9.77it/s, lr=5.86e-5, b_loss=0.565, b_a


[ Train | 066/300 ] loss = 0.59838, acc = 0.79171


V: 066/300: 100%|████| 26/26 [00:01<00:00, 13.81it/s, v_loss=0.834, v_acc=0.732]


[ Valid | 066/300 ] loss = 0.83430, acc = 0.73198
[ Valid | 066/300 ] loss = 0.83430, acc = 0.73198


T: 067/300: 100%|█| 78/78 [00:07<00:00, 10.24it/s, lr=4.79e-5, b_loss=0.597, b_a


[ Train | 067/300 ] loss = 0.60192, acc = 0.78922


V: 067/300: 100%|████| 26/26 [00:02<00:00, 12.21it/s, v_loss=0.823, v_acc=0.742]


[ Valid | 067/300 ] loss = 0.82312, acc = 0.74160
[ Valid | 067/300 ] loss = 0.82312, acc = 0.74160 -> best
Best model found at fold 4 epoch 67, acc=0.74160, saving model


T: 068/300: 100%|█| 78/78 [00:07<00:00, 10.26it/s, lr=3.82e-5, b_loss=0.565, b_a


[ Train | 068/300 ] loss = 0.57694, acc = 0.80087


V: 068/300: 100%|████| 26/26 [00:02<00:00, 11.86it/s, v_loss=0.791, v_acc=0.751]


[ Valid | 068/300 ] loss = 0.79122, acc = 0.75147
[ Valid | 068/300 ] loss = 0.79122, acc = 0.75147 -> best
Best model found at fold 4 epoch 68, acc=0.75147, saving model


T: 069/300: 100%|█| 78/78 [00:07<00:00, 10.11it/s, lr=2.95e-5, b_loss=0.706, b_a


[ Train | 069/300 ] loss = 0.58129, acc = 0.80064


V: 069/300: 100%|████| 26/26 [00:02<00:00, 11.71it/s, v_loss=0.795, v_acc=0.752]


[ Valid | 069/300 ] loss = 0.79477, acc = 0.75240
[ Valid | 069/300 ] loss = 0.79477, acc = 0.75240 -> best
Best model found at fold 4 epoch 69, acc=0.75240, saving model


T: 070/300: 100%|█| 78/78 [00:07<00:00, 10.34it/s, lr=2.18e-5, b_loss=0.631, b_a


[ Train | 070/300 ] loss = 0.57525, acc = 0.79949


V: 070/300: 100%|████| 26/26 [00:02<00:00, 12.78it/s, v_loss=0.775, v_acc=0.755]


[ Valid | 070/300 ] loss = 0.77534, acc = 0.75540
[ Valid | 070/300 ] loss = 0.77534, acc = 0.75540 -> best
Best model found at fold 4 epoch 70, acc=0.75540, saving model


T: 071/300: 100%|█| 78/78 [00:07<00:00, 10.06it/s, lr=1.52e-5, b_loss=0.589, b_a


[ Train | 071/300 ] loss = 0.56212, acc = 0.80821


V: 071/300: 100%|████| 26/26 [00:02<00:00, 11.62it/s, v_loss=0.773, v_acc=0.756]


[ Valid | 071/300 ] loss = 0.77343, acc = 0.75636
[ Valid | 071/300 ] loss = 0.77343, acc = 0.75636 -> best
Best model found at fold 4 epoch 71, acc=0.75636, saving model


T: 072/300: 100%|█| 78/78 [00:07<00:00, 10.34it/s, lr=9.79e-6, b_loss=0.756, b_a


[ Train | 072/300 ] loss = 0.55240, acc = 0.80805


V: 072/300: 100%|████| 26/26 [00:01<00:00, 14.04it/s, v_loss=0.781, v_acc=0.756]


[ Valid | 072/300 ] loss = 0.78142, acc = 0.75558
[ Valid | 072/300 ] loss = 0.78142, acc = 0.75558


T: 073/300: 100%|█| 78/78 [00:07<00:00, 10.23it/s, lr=5.53e-6, b_loss=0.621, b_a


[ Train | 073/300 ] loss = 0.54413, acc = 0.81453


V: 073/300: 100%|████| 26/26 [00:01<00:00, 13.39it/s, v_loss=0.775, v_acc=0.756]


[ Valid | 073/300 ] loss = 0.77547, acc = 0.75573
[ Valid | 073/300 ] loss = 0.77547, acc = 0.75573


T: 074/300: 100%|█| 78/78 [00:07<00:00, 10.02it/s, lr=2.46e-6, b_loss=0.626, b_a


[ Train | 074/300 ] loss = 0.54274, acc = 0.81736


V: 074/300: 100%|████| 26/26 [00:02<00:00, 12.33it/s, v_loss=0.775, v_acc=0.757]


[ Valid | 074/300 ] loss = 0.77461, acc = 0.75669
[ Valid | 074/300 ] loss = 0.77461, acc = 0.75669 -> best
Best model found at fold 4 epoch 74, acc=0.75669, saving model


T: 075/300: 100%|█| 78/78 [00:07<00:00,  9.85it/s, lr=6.17e-7, b_loss=0.551, b_a


[ Train | 075/300 ] loss = 0.53856, acc = 0.81470


V: 075/300: 100%|████| 26/26 [00:02<00:00, 12.80it/s, v_loss=0.774, v_acc=0.759]


[ Valid | 075/300 ] loss = 0.77440, acc = 0.75930
[ Valid | 075/300 ] loss = 0.77440, acc = 0.75930 -> best
Best model found at fold 4 epoch 75, acc=0.75930, saving model


T: 076/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.0004, b_loss=0.837, b_ac


[ Train | 076/300 ] loss = 0.82066, acc = 0.71520


V: 076/300: 100%|█████| 26/26 [00:02<00:00, 10.71it/s, v_loss=1.04, v_acc=0.657]


[ Valid | 076/300 ] loss = 1.03743, acc = 0.65671
[ Valid | 076/300 ] loss = 1.03743, acc = 0.65671


T: 077/300: 100%|█| 78/78 [00:07<00:00, 10.17it/s, lr=0.0004, b_loss=0.84, b_acc


[ Train | 077/300 ] loss = 0.82733, acc = 0.71364


V: 077/300: 100%|█████| 26/26 [00:02<00:00, 11.31it/s, v_loss=1.13, v_acc=0.636]


[ Valid | 077/300 ] loss = 1.12861, acc = 0.63618
[ Valid | 077/300 ] loss = 1.12861, acc = 0.63618


T: 078/300: 100%|█| 78/78 [00:07<00:00,  9.91it/s, lr=0.000399, b_loss=0.956, b_


[ Train | 078/300 ] loss = 0.81832, acc = 0.71766


V: 078/300: 100%|█████| 26/26 [00:02<00:00, 11.07it/s, v_loss=1.11, v_acc=0.625]


[ Valid | 078/300 ] loss = 1.10777, acc = 0.62460
[ Valid | 078/300 ] loss = 1.10777, acc = 0.62460


T: 079/300: 100%|█| 78/78 [00:07<00:00,  9.83it/s, lr=0.000399, b_loss=0.815, b_


[ Train | 079/300 ] loss = 0.79089, acc = 0.72661


V: 079/300: 100%|█████| 26/26 [00:02<00:00, 12.64it/s, v_loss=1.05, v_acc=0.658]


[ Valid | 079/300 ] loss = 1.05323, acc = 0.65769
[ Valid | 079/300 ] loss = 1.05323, acc = 0.65769


T: 080/300: 100%|█| 78/78 [00:07<00:00,  9.95it/s, lr=0.000398, b_loss=0.796, b_


[ Train | 080/300 ] loss = 0.78705, acc = 0.72892


V: 080/300: 100%|█████| 26/26 [00:02<00:00, 12.13it/s, v_loss=1.01, v_acc=0.676]


[ Valid | 080/300 ] loss = 1.00653, acc = 0.67603
[ Valid | 080/300 ] loss = 1.00653, acc = 0.67603


T: 081/300: 100%|█| 78/78 [00:07<00:00,  9.99it/s, lr=0.000396, b_loss=0.882, b_


[ Train | 081/300 ] loss = 0.77993, acc = 0.72577


V: 081/300: 100%|█████| 26/26 [00:02<00:00, 11.79it/s, v_loss=1.07, v_acc=0.655]


[ Valid | 081/300 ] loss = 1.06678, acc = 0.65491
[ Valid | 081/300 ] loss = 1.06678, acc = 0.65491


T: 082/300: 100%|█| 78/78 [00:07<00:00,  9.81it/s, lr=0.000394, b_loss=0.892, b_


[ Train | 082/300 ] loss = 0.75799, acc = 0.73511


V: 082/300: 100%|██████| 26/26 [00:02<00:00, 11.30it/s, v_loss=1.08, v_acc=0.66]


[ Valid | 082/300 ] loss = 1.08150, acc = 0.65976
[ Valid | 082/300 ] loss = 1.08150, acc = 0.65976


T: 083/300: 100%|█| 78/78 [00:07<00:00,  9.81it/s, lr=0.000392, b_loss=0.654, b_


[ Train | 083/300 ] loss = 0.74222, acc = 0.74145


V: 083/300: 100%|█████| 26/26 [00:02<00:00, 12.67it/s, v_loss=1.01, v_acc=0.695]


[ Valid | 083/300 ] loss = 1.00615, acc = 0.69523
[ Valid | 083/300 ] loss = 1.00615, acc = 0.69523


T: 084/300: 100%|█| 78/78 [00:07<00:00,  9.90it/s, lr=0.00039, b_loss=0.751, b_a


[ Train | 084/300 ] loss = 0.74002, acc = 0.74618


V: 084/300: 100%|████| 26/26 [00:02<00:00, 12.49it/s, v_loss=0.961, v_acc=0.685]


[ Valid | 084/300 ] loss = 0.96090, acc = 0.68496
[ Valid | 084/300 ] loss = 0.96090, acc = 0.68496


T: 085/300: 100%|█| 78/78 [00:07<00:00,  9.98it/s, lr=0.000388, b_loss=0.667, b_


[ Train | 085/300 ] loss = 0.73330, acc = 0.74654


V: 085/300: 100%|█████| 26/26 [00:02<00:00, 12.21it/s, v_loss=1.19, v_acc=0.637]


[ Valid | 085/300 ] loss = 1.19325, acc = 0.63684
[ Valid | 085/300 ] loss = 1.19325, acc = 0.63684


T: 086/300: 100%|█| 78/78 [00:07<00:00, 10.06it/s, lr=0.000385, b_loss=0.848, b_


[ Train | 086/300 ] loss = 0.71593, acc = 0.74815


V: 086/300: 100%|█████| 26/26 [00:02<00:00, 12.75it/s, v_loss=1.18, v_acc=0.648]


[ Valid | 086/300 ] loss = 1.18472, acc = 0.64838
[ Valid | 086/300 ] loss = 1.18472, acc = 0.64838


T: 087/300: 100%|█| 78/78 [00:08<00:00,  9.63it/s, lr=0.000382, b_loss=0.802, b_


[ Train | 087/300 ] loss = 0.72098, acc = 0.75335


V: 087/300: 100%|█████| 26/26 [00:02<00:00, 12.51it/s, v_loss=1.07, v_acc=0.663]


[ Valid | 087/300 ] loss = 1.07328, acc = 0.66338
[ Valid | 087/300 ] loss = 1.07328, acc = 0.66338


T: 088/300: 100%|█| 78/78 [00:07<00:00,  9.90it/s, lr=0.000378, b_loss=0.863, b_


[ Train | 088/300 ] loss = 0.70219, acc = 0.75559


V: 088/300: 100%|████| 26/26 [00:01<00:00, 13.31it/s, v_loss=0.941, v_acc=0.691]


[ Valid | 088/300 ] loss = 0.94064, acc = 0.69069
[ Valid | 088/300 ] loss = 0.94064, acc = 0.69069


T: 089/300: 100%|█| 78/78 [00:07<00:00,  9.86it/s, lr=0.000374, b_loss=0.783, b_


[ Train | 089/300 ] loss = 0.69160, acc = 0.76083


V: 089/300: 100%|█████| 26/26 [00:01<00:00, 13.37it/s, v_loss=1.02, v_acc=0.675]


[ Valid | 089/300 ] loss = 1.01575, acc = 0.67509
[ Valid | 089/300 ] loss = 1.01575, acc = 0.67509


T: 090/300: 100%|█| 78/78 [00:07<00:00, 10.06it/s, lr=0.000371, b_loss=0.711, b_


[ Train | 090/300 ] loss = 0.68906, acc = 0.76181


V: 090/300: 100%|█████| 26/26 [00:02<00:00, 12.44it/s, v_loss=1.14, v_acc=0.649]


[ Valid | 090/300 ] loss = 1.13732, acc = 0.64946
[ Valid | 090/300 ] loss = 1.13732, acc = 0.64946


T: 091/300: 100%|█| 78/78 [00:07<00:00,  9.95it/s, lr=0.000366, b_loss=0.666, b_


[ Train | 091/300 ] loss = 0.69598, acc = 0.75886


V: 091/300: 100%|█████| 26/26 [00:02<00:00, 11.74it/s, v_loss=1.09, v_acc=0.671]


[ Valid | 091/300 ] loss = 1.09239, acc = 0.67061
[ Valid | 091/300 ] loss = 1.09239, acc = 0.67061


T: 092/300: 100%|█| 78/78 [00:07<00:00,  9.82it/s, lr=0.000362, b_loss=0.738, b_


[ Train | 092/300 ] loss = 0.67381, acc = 0.76558


V: 092/300: 100%|██████| 26/26 [00:02<00:00, 12.56it/s, v_loss=0.991, v_acc=0.7]


[ Valid | 092/300 ] loss = 0.99100, acc = 0.70032
[ Valid | 092/300 ] loss = 0.99100, acc = 0.70032


T: 093/300: 100%|█| 78/78 [00:07<00:00, 10.25it/s, lr=0.000357, b_loss=0.801, b_


[ Train | 093/300 ] loss = 0.65963, acc = 0.77462


V: 093/300: 100%|████| 26/26 [00:01<00:00, 13.99it/s, v_loss=0.939, v_acc=0.703]


[ Valid | 093/300 ] loss = 0.93914, acc = 0.70334
[ Valid | 093/300 ] loss = 0.93914, acc = 0.70334


T: 094/300: 100%|█| 78/78 [00:07<00:00, 10.15it/s, lr=0.000352, b_loss=0.671, b_


[ Train | 094/300 ] loss = 0.65293, acc = 0.77866


V: 094/300: 100%|██████| 26/26 [00:01<00:00, 13.08it/s, v_loss=1.08, v_acc=0.66]


[ Valid | 094/300 ] loss = 1.07878, acc = 0.66007
[ Valid | 094/300 ] loss = 1.07878, acc = 0.66007


T: 095/300: 100%|█| 78/78 [00:07<00:00,  9.82it/s, lr=0.000347, b_loss=0.649, b_


[ Train | 095/300 ] loss = 0.64380, acc = 0.77939


V: 095/300: 100%|████| 26/26 [00:02<00:00, 11.88it/s, v_loss=0.894, v_acc=0.717]


[ Valid | 095/300 ] loss = 0.89421, acc = 0.71689
[ Valid | 095/300 ] loss = 0.89421, acc = 0.71689


T: 096/300: 100%|█| 78/78 [00:07<00:00,  9.80it/s, lr=0.000341, b_loss=0.587, b_


[ Train | 096/300 ] loss = 0.63248, acc = 0.78035


V: 096/300: 100%|█████| 26/26 [00:02<00:00, 11.13it/s, v_loss=0.886, v_acc=0.73]

[ Valid | 096/300 ] loss = 0.88637, acc = 0.73044
[ Valid | 096/300 ] loss = 0.88637, acc = 0.73044
No improvment 20 consecutive epochs, early stopping





VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_training_loss,█▇▆▆▅▄▄▅▄▄▃▃▃▃▂▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▂▂▂▂▂▂▂▂▁
average_validation_loss,█▇▅▆█▄▃▄▄▄▄▃▂▂▂▄▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▂▃▃▂▃▂▃▂▂
learning rate,█▆▂▇▆▂▁█▇▇▅▄▂▂▁███▇▇▆▆▅▄▄▃▂▂▁▁▁██████▇▇▇
step_training_accuracy,▁▂▃▃▃▅▆▅▅▅▆▅▇▇▆▆▆▆▇▇▆▇▇▇▆███▇▇█▆▇█████▇▇
step_training_loss,█▆▆▆▅▅▄▄▄▄▄▄▂▂▃▄▃▃▃▃▃▂▂▂▃▁▁▂▂▂▁▃▂▁▂▂▂▂▂▁
step_validation_accuracy,▂▁▃▂▁▅▆▅▅▄▆▅▆▇▇▄▅▆▇▆▇▇▇▅▇█▇▇██▇▆▇▆▇▇▆▇▇▇
step_validation_loss,▇▆▅▆█▄▃▄▄▅▄▄▃▂▂▅▄▃▂▄▂▂▂▄▂▂▂▃▂▁▂▃▂▃▃▃▃▂▂▃

0,1
average_training_loss,0.63248
average_validation_loss,0.88637
learning rate,0.00034
step_training_accuracy,0.75
step_training_loss,0.58715
step_validation_accuracy,0.72581
step_validation_loss,1.02649


In [12]:
test_dir = "./food-11/test"
test_set = FoodDataset(test_dir, tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_cpu, pin_memory=True)

# Testing and generate prediction CSV

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"

test_fold = k_fold

models = []
for i in range(test_fold):
    fold = i + 1
    model_best = Classifier(Residual_Block, num_layers).to(device)
    model_best.load_state_dict(torch.load(f"Fold_{fold}_best.ckpt"))
    model_best.eval()
    models.append(model_best)

prediction = []
test_accs = []
true_labels = []
with torch.no_grad():
    for data, labels in test_loader:
        test_preds = [] 
        for model_best in models:
            test_preds.append(model_best(data.to(device)).cpu().data.numpy())
        
        preds = []
        for i in range(test_fold):
            temp_pred = np.argmax(test_preds[i], axis=1).tolist()
            preds.append(temp_pred)
        pred = list(zip(*preds))
        pred = np.array(pred)
        choice = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=pred)

        test_preds = torch.from_numpy(choice).to(device)
        acc = (test_preds == labels.to(device)).float().mean()
        test_accs.append(acc)
        prediction += choice.squeeze().tolist()
        true_labels.extend(labels.tolist())

test_acc = sum(test_accs) / len(test_accs)
print(f"Test accurary: {test_acc}")

Test accurary: 0.7779452800750732


In [14]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("prediction_resnet.csv",index = False)

df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = true_labels
df.to_csv("true_labels.csv",index = False)