In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as opt
import torch.nn.functional as F
from pytorch_metric_learning import losses
from torchvision.io import read_image
from torchvision import transforms
from tqdm.notebook import tqdm
from PIL import Image
import optuna
from optuna.trial import TrialState

In [2]:
random.seed(2020)

In [3]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [4]:
resize_image = transforms.Resize(size=(227, 227))

In [5]:
def calculate_mean(dataset):
    mean = torch.Tensor([0.,0.,0.])
    
    num_samples = len(dataset)
    for i in range(num_samples):
        image, _, _, _ = dataset[i]
        image = image.type(torch.FloatTensor) / 255
        
        for j in range(3):
            mean[j] += torch.mean(image[j, :, :])
            
    mean = mean / num_samples
    return mean

In [6]:
def subtract_mean(image, mean):
    normed_image = torch.zeros(image.size())
    image = image.type(torch.FloatTensor) / 255
    
    for j in range(3):
        normed_image[j] = image[j, :, :] - mean[j]
    return normed_image

In [7]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir, train=False, transform=None):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
        self.transform = transform
        self.is_train = train
        self.mean = torch.Tensor([0.,0.,0.])
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = read_image(img_path)
        # image = resize_image(image)
        label = self.img_lbls.iloc[idx, 1]
    
        if self.is_train:
            positive_list = self.img_lbls[self.img_lbls.iloc[:, 1] == label].index.values
            positive_list = np.setdiff1d(positive_list, np.array([idx]))
            positive_item = random.choice(positive_list)
            positive_img = self.img_lbls.iloc[positive_item, 0]
            pos_img_path = os.path.join(self.img_dir, positive_img)
            pos_image = read_image(pos_img_path)
            
            negative_list = self.img_lbls[self.img_lbls.iloc[:, 1] != label].index.values
            negative_item = random.choice(negative_list)
            negative_img = self.img_lbls.iloc[negative_item, 0]
            neg_img_path = os.path.join(self.img_dir, negative_img)
            neg_image = read_image(neg_img_path)

            if self.transform:
                image = self.transform(image, self.mean)
                pos_image = self.transform(pos_image, self.mean)
                neg_image = self.transform(neg_image, self.mean)
                
            return image, pos_image, neg_image, label
        else:
            if self.transform:
                image = self.transform(image, self.mean)

            return image, label
    
    def set_mean(self, mean):
        self.mean = mean

In [8]:
# train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True)
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True, transform=subtract_mean)
test_data = AdienceDataset("../test.csv", "../cropped_Adience/", train=False, transform=subtract_mean)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=True, transform=subtract_mean)

In [9]:
# mean = calculate_mean(train_data)
mean = torch.Tensor([0.5062, 0.3581, 0.3099])
mean

tensor([0.5062, 0.3581, 0.3099])

In [10]:
train_data.set_mean(mean)
test_data.set_mean(mean)
val_data.set_mean(mean)

In [11]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()

In [12]:
class CombinedLoss(nn.Module):
    def __init__(self, beta=1.0):
        super(CombinedLoss, self).__init__()
        self.beta = beta
        self.triplet = TripletLoss(margin=1.0)
        self.classification = nn.CrossEntropyLoss()
        
    def forward(self, anchor, positive, negative, classification_out, labels):
        triplet_loss = self.triplet(anchor, positive, negative)
        classification_loss = self.classification(classification_out, labels)
        total_loss = (self.beta * triplet_loss) + classification_loss
        
        return total_loss

In [13]:
class Net(nn.Module):
    def __init__(self, num_channels, num_classes):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(num_channels, 96, kernel_size=7, stride=4)
        self.conv11 = nn.Conv2d(96, 256, kernel_size=1)
        self.conv2 = nn.Conv2d(96, 256, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(256, 256, kernel_size=5, padding=2)
        self.conv33 = nn.Conv2d(256, 384, kernel_size=1)
        self.conv4 = nn.Conv2d(256, 384, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(384, 384, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm = nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75)
        
        self.fc6 = nn.Linear(in_features=13824, out_features=512)
        self.fc7 = nn.Linear(in_features=512, out_features=512)
        self.fc8 = nn.Linear(in_features=512, out_features=num_classes)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x, alpha: float):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.norm(x)
        
        sl1 = self.conv11(x)
        sl1 = self.relu(sl1)
        
        x = self.conv2(x)
        x = self.relu((1-alpha)*x + alpha*sl1)
        # x = self.relu(x + sl1)
        # x = self.relu(x)
        sl2 = x
        
        x = self.conv3(x)
        x = self.relu((1-alpha)*x + alpha*sl2)
        # x = self.relu(x + sl2)
        # x = self.relu(x)
        x = self.pool(x)
        x = self.norm(x)
        
        sl3 = self.conv33(x)
        sl3 = self.relu(sl3)
        
        x = self.conv4(x)
        x = self.relu((1-alpha)*x + alpha*sl3)
        # x = self.relu(x + sl3)
        # x = self.relu(x)
        sl4 = x
        
        x = self.conv5(x)
        x = self.relu((1-alpha)*x + alpha*sl4)
        # x = self.relu(x + sl4)
        # x = self.relu(x)
        x = self.pool(x)
        x = self.norm(x)
        
        x = torch.flatten(x, 1)
        x = self.fc6(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc7(x)
        x_embeds = x
        x = self.relu(x)
        x = self.dropout(x)
        
        x_classification = self.fc8(x)
        
        return x_embeds, x_classification

In [14]:
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight)

In [15]:
best_accu = 0.928253710269928
def objective(trial):
    model = Net(3, 2)
    model.apply(init_weights)
    model = torch.jit.script(model).to(device)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    mom = trial.suggest_float('momentum', 0.7, 0.99)
    wd = trial.suggest_float('weight_decay', 1e-5, 1e-1, log=True)
    optimizer = opt.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd)
    
    alpha = trial.suggest_float("alpha", 0.1, 0.5, step=0.1)
    beta = trial.suggest_float("beta", 0.1, 1.0, step=0.1)
    gamma = trial.suggest_float("gamma", 0.1, 1.0, step=0.1)
    criterion = torch.jit.script(CombinedLoss(beta=beta))
    arc_margin = losses.ArcFaceLoss(2, 512, margin=0.5, scale=30.0)
    
    batch_size = trial.suggest_int('batch_size', 10, 100)
    num_epochs = trial.suggest_int('epochs', 20, 100)
    
    print("Learning rate: "+ str(lr))
    print("Momentum: "+ str(mom))
    print("Weight decay: "+ str(wd))
    print("Alpha: "+ str(alpha))
    print("Beta: "+ str(beta))
    print("Gamma: "+ str(gamma))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # training loop
        running_loss = []
        running_accu = []
        
        model.train()
        for step, (anchor_img, positive_img, negative_img, anchor_label) in enumerate(tqdm(train_loader, desc="Training", leave=False)):
            anchor_img, positive_img, negative_img, anchor_label = anchor_img.to(device), positive_img.to(device), negative_img.to(device), anchor_label.to(device)

            optimizer.zero_grad()
            anchor_out, anchor_pred = model(anchor_img, alpha)
            positive_out, _ = model(positive_img, alpha)
            negative_out, _ = model(negative_img, alpha)
            
            pred = torch.argmax(anchor_pred, 1)
            accuracy = torch.eq(pred, anchor_label).sum() / len(anchor_img)

            class_triplet_loss = criterion(anchor_out, positive_out, negative_out, anchor_pred, anchor_label)
            arc_loss = arc_margin(anchor_out, anchor_label)
            loss = (gamma * arc_loss) + class_triplet_loss
            loss.backward()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # validation loop
        val_loss = []
        val_accu = []

        model.eval()
        with torch.no_grad():
            for img, pos_img, neg_img, label in tqdm(val_loader):
                img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)
                
                anchor, output = model(img, alpha)
                pos, _ = model(pos_img, alpha)
                neg, _ = model(neg_img, alpha)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                class_triplet_loss = criterion(anchor, pos, neg, output, label)
                arc_loss = arc_margin(anchor, label)
                loss = (gamma * arc_loss) + class_triplet_loss
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model.state_dict(), "../incr_skip_arcface_triplet_00.pt")
            
    return val_accu

In [16]:
study = optuna.create_study(direction='maximize',
                            study_name='incremented-skip-alpha-arcface-triplet-libfaceid-study',
                            storage='sqlite:///study1.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=10)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-11-22 18:41:19,032][0m Using an existing study with name 'incremented-skip-alpha-arcface-triplet-libfaceid-study' instead of creating a new one.[0m


Learning rate: 0.0006235105175506891
Momentum: 0.7033420898757605
Weight decay: 1.4555922573846378e-05
Alpha: 0.4
Beta: 1.0
Gamma: 0.1
Batch size: 68
Number of epochs: 64


Epochs:   0%|          | 0/64 [00:00<?, ?it/s]

Training:   0%|          | 0/206 [00:00<?, ?it/s]

Epoch: 1/64 - Loss: 1.7783 - Accuracy: 0.5189


  0%|          | 0/26 [00:00<?, ?it/s]

[32m[I 2023-11-22 18:42:29,632][0m Trial 11 pruned. [0m


Val Loss: 1.7611 - Val Accuracy: 0.5346
Learning rate: 1.5014693815396712e-05
Momentum: 0.9112265748845534
Weight decay: 0.0004395295031275492
Alpha: 0.1
Beta: 0.30000000000000004
Gamma: 0.30000000000000004
Batch size: 11
Number of epochs: 77


Epochs:   0%|          | 0/77 [00:00<?, ?it/s]

Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 1/77 - Loss: 1.2269 - Accuracy: 0.5319


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.1808 - Val Accuracy: 0.5359


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 2/77 - Loss: 1.1946 - Accuracy: 0.5356


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.1693 - Val Accuracy: 0.5359


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 3/77 - Loss: 1.1799 - Accuracy: 0.5381


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.1532 - Val Accuracy: 0.5359


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 4/77 - Loss: 1.1660 - Accuracy: 0.5395


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.1491 - Val Accuracy: 0.5359


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 5/77 - Loss: 1.1491 - Accuracy: 0.5430


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.1216 - Val Accuracy: 0.5359


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 6/77 - Loss: 1.1378 - Accuracy: 0.5468


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.1154 - Val Accuracy: 0.5399


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 7/77 - Loss: 1.1232 - Accuracy: 0.5584


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0930 - Val Accuracy: 0.5525


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 8/77 - Loss: 1.1072 - Accuracy: 0.5762


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0761 - Val Accuracy: 0.6108


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 9/77 - Loss: 1.0915 - Accuracy: 0.5953


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0670 - Val Accuracy: 0.6245


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 10/77 - Loss: 1.0793 - Accuracy: 0.6213


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0553 - Val Accuracy: 0.6617


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 11/77 - Loss: 1.0659 - Accuracy: 0.6378


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0346 - Val Accuracy: 0.6937


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 12/77 - Loss: 1.0465 - Accuracy: 0.6739


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0189 - Val Accuracy: 0.7126


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 13/77 - Loss: 1.0365 - Accuracy: 0.7004


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 1.0013 - Val Accuracy: 0.7386


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 14/77 - Loss: 1.0145 - Accuracy: 0.7278


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9880 - Val Accuracy: 0.7114


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 15/77 - Loss: 0.9993 - Accuracy: 0.7451


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9681 - Val Accuracy: 0.7604


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 16/77 - Loss: 0.9851 - Accuracy: 0.7588


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9643 - Val Accuracy: 0.7778


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 17/77 - Loss: 0.9625 - Accuracy: 0.7771


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9411 - Val Accuracy: 0.8007


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 18/77 - Loss: 0.9492 - Accuracy: 0.7854


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9256 - Val Accuracy: 0.8081


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 19/77 - Loss: 0.9281 - Accuracy: 0.7972


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9042 - Val Accuracy: 0.8070


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 20/77 - Loss: 0.9074 - Accuracy: 0.8078


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.9089 - Val Accuracy: 0.8041


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 21/77 - Loss: 0.8884 - Accuracy: 0.8200


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.8796 - Val Accuracy: 0.8009


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 22/77 - Loss: 0.8685 - Accuracy: 0.8232


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.8529 - Val Accuracy: 0.8316


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 23/77 - Loss: 0.8435 - Accuracy: 0.8304


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.8427 - Val Accuracy: 0.8339


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 24/77 - Loss: 0.8221 - Accuracy: 0.8358


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.8165 - Val Accuracy: 0.8350


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 25/77 - Loss: 0.8029 - Accuracy: 0.8449


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.8122 - Val Accuracy: 0.8407


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 26/77 - Loss: 0.7785 - Accuracy: 0.8532


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7904 - Val Accuracy: 0.8419


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 27/77 - Loss: 0.7541 - Accuracy: 0.8566


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7683 - Val Accuracy: 0.8430


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 28/77 - Loss: 0.7354 - Accuracy: 0.8624


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7518 - Val Accuracy: 0.8430


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 29/77 - Loss: 0.7134 - Accuracy: 0.8709


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7435 - Val Accuracy: 0.8467


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 30/77 - Loss: 0.6897 - Accuracy: 0.8749


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7439 - Val Accuracy: 0.8419


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 31/77 - Loss: 0.6580 - Accuracy: 0.8822


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7241 - Val Accuracy: 0.8413


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 32/77 - Loss: 0.6435 - Accuracy: 0.8857


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7204 - Val Accuracy: 0.8499


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 33/77 - Loss: 0.6199 - Accuracy: 0.8919


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.6625 - Val Accuracy: 0.8605


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 34/77 - Loss: 0.6001 - Accuracy: 0.8957


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.7050 - Val Accuracy: 0.8390


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 35/77 - Loss: 0.5784 - Accuracy: 0.8973


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.6932 - Val Accuracy: 0.8533


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 36/77 - Loss: 0.5524 - Accuracy: 0.9042


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.6358 - Val Accuracy: 0.8616


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 37/77 - Loss: 0.5357 - Accuracy: 0.9103


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.6289 - Val Accuracy: 0.8576


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 38/77 - Loss: 0.5124 - Accuracy: 0.9130


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.6547 - Val Accuracy: 0.8611


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 39/77 - Loss: 0.5001 - Accuracy: 0.9126


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.6103 - Val Accuracy: 0.8696


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 40/77 - Loss: 0.4728 - Accuracy: 0.9240


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5839 - Val Accuracy: 0.8816


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 41/77 - Loss: 0.4508 - Accuracy: 0.9267


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5829 - Val Accuracy: 0.8834


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 42/77 - Loss: 0.4342 - Accuracy: 0.9285


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5837 - Val Accuracy: 0.8811


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 43/77 - Loss: 0.4134 - Accuracy: 0.9348


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5689 - Val Accuracy: 0.8856


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 44/77 - Loss: 0.3941 - Accuracy: 0.9422


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5485 - Val Accuracy: 0.8805


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 45/77 - Loss: 0.3737 - Accuracy: 0.9446


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5482 - Val Accuracy: 0.8908


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 46/77 - Loss: 0.3566 - Accuracy: 0.9470


  0%|          | 0/159 [00:00<?, ?it/s]

Val Loss: 0.5310 - Val Accuracy: 0.8856


Training:   0%|          | 0/1271 [00:00<?, ?it/s]

Epoch: 47/77 - Loss: 0.3399 - Accuracy: 0.9496


  0%|          | 0/159 [00:00<?, ?it/s]

[32m[I 2023-11-22 19:56:53,812][0m Trial 12 pruned. [0m


Val Loss: 0.5579 - Val Accuracy: 0.8879
Learning rate: 6.446694620659784e-05
Momentum: 0.9827657294877697
Weight decay: 0.007253273183017979
Alpha: 0.2
Beta: 0.6
Gamma: 0.4
Batch size: 44
Number of epochs: 51


Epochs:   0%|          | 0/51 [00:00<?, ?it/s]

Training:   0%|          | 0/318 [00:00<?, ?it/s]

Epoch: 1/51 - Loss: 1.6172 - Accuracy: 0.5361


  0%|          | 0/40 [00:00<?, ?it/s]

[32m[I 2023-11-22 19:58:02,520][0m Trial 13 pruned. [0m


Val Loss: 1.5586 - Val Accuracy: 0.5351
Learning rate: 4.483682495515186e-05
Momentum: 0.8896072364781766
Weight decay: 0.04213228074859547
Alpha: 0.30000000000000004
Beta: 0.1
Gamma: 0.8
Batch size: 36
Number of epochs: 74


Epochs:   0%|          | 0/74 [00:00<?, ?it/s]

Training:   0%|          | 0/389 [00:00<?, ?it/s]

Epoch: 1/74 - Loss: 1.4696 - Accuracy: 0.4663


  0%|          | 0/49 [00:00<?, ?it/s]

[32m[I 2023-11-22 19:59:16,622][0m Trial 14 pruned. [0m


Val Loss: 1.3558 - Val Accuracy: 0.4662
Learning rate: 0.00015712870265123736
Momentum: 0.9381421587395706
Weight decay: 0.0007118166324526656
Alpha: 0.1
Beta: 0.4
Gamma: 0.4
Batch size: 74
Number of epochs: 20


Epochs:   0%|          | 0/20 [00:00<?, ?it/s]

Training:   0%|          | 0/189 [00:00<?, ?it/s]

Epoch: 1/20 - Loss: 1.4170 - Accuracy: 0.5339


  0%|          | 0/24 [00:00<?, ?it/s]

[32m[I 2023-11-22 20:00:23,433][0m Trial 15 pruned. [0m


Val Loss: 1.3445 - Val Accuracy: 0.5346
Learning rate: 0.0003710557372201616
Momentum: 0.8222236151791261
Weight decay: 0.0028156676930909575
Alpha: 0.2
Beta: 0.8
Gamma: 0.2
Batch size: 58
Number of epochs: 100


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 1/100 - Loss: 1.6584 - Accuracy: 0.5308


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.6203 - Val Accuracy: 0.5377


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 2/100 - Loss: 1.6309 - Accuracy: 0.5373


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.6080 - Val Accuracy: 0.5377


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 3/100 - Loss: 1.6167 - Accuracy: 0.5401


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.5869 - Val Accuracy: 0.5377


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 4/100 - Loss: 1.5905 - Accuracy: 0.5473


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.5565 - Val Accuracy: 0.5388


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 5/100 - Loss: 1.5516 - Accuracy: 0.5627


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.4941 - Val Accuracy: 0.6376


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 6/100 - Loss: 1.5133 - Accuracy: 0.6072


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.4428 - Val Accuracy: 0.6877


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 7/100 - Loss: 1.4666 - Accuracy: 0.6582


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.3641 - Val Accuracy: 0.7308


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 8/100 - Loss: 1.4117 - Accuracy: 0.7178


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.3444 - Val Accuracy: 0.7511


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 9/100 - Loss: 1.3478 - Accuracy: 0.7594


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.2805 - Val Accuracy: 0.8037


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 10/100 - Loss: 1.2977 - Accuracy: 0.7846


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.1923 - Val Accuracy: 0.8070


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 11/100 - Loss: 1.2388 - Accuracy: 0.8038


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.1845 - Val Accuracy: 0.8287


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 12/100 - Loss: 1.1757 - Accuracy: 0.8224


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.1694 - Val Accuracy: 0.7961


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 13/100 - Loss: 1.1043 - Accuracy: 0.8392


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.0667 - Val Accuracy: 0.8398


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 14/100 - Loss: 1.0538 - Accuracy: 0.8475


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 1.0477 - Val Accuracy: 0.8504


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 15/100 - Loss: 0.9910 - Accuracy: 0.8650


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.9847 - Val Accuracy: 0.8465


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 16/100 - Loss: 0.9234 - Accuracy: 0.8741


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.9420 - Val Accuracy: 0.8539


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 17/100 - Loss: 0.8581 - Accuracy: 0.8843


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.9495 - Val Accuracy: 0.8612


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 18/100 - Loss: 0.7969 - Accuracy: 0.8943


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8815 - Val Accuracy: 0.8632


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 19/100 - Loss: 0.7397 - Accuracy: 0.9074


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8103 - Val Accuracy: 0.8799


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 20/100 - Loss: 0.6751 - Accuracy: 0.9176


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8005 - Val Accuracy: 0.8732


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 21/100 - Loss: 0.6226 - Accuracy: 0.9223


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7880 - Val Accuracy: 0.8860


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 22/100 - Loss: 0.5660 - Accuracy: 0.9309


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8142 - Val Accuracy: 0.8687


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 23/100 - Loss: 0.5276 - Accuracy: 0.9369


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7309 - Val Accuracy: 0.8910


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 24/100 - Loss: 0.4704 - Accuracy: 0.9438


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7005 - Val Accuracy: 0.9027


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 25/100 - Loss: 0.4232 - Accuracy: 0.9526


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7288 - Val Accuracy: 0.8943


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 26/100 - Loss: 0.3882 - Accuracy: 0.9590


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6694 - Val Accuracy: 0.9004


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 27/100 - Loss: 0.3464 - Accuracy: 0.9640


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6566 - Val Accuracy: 0.8990


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 28/100 - Loss: 0.3151 - Accuracy: 0.9679


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6686 - Val Accuracy: 0.8904


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 29/100 - Loss: 0.2893 - Accuracy: 0.9711


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6187 - Val Accuracy: 0.9138


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 30/100 - Loss: 0.2553 - Accuracy: 0.9771


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6645 - Val Accuracy: 0.9071


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 31/100 - Loss: 0.2304 - Accuracy: 0.9782


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6488 - Val Accuracy: 0.9043


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 32/100 - Loss: 0.2106 - Accuracy: 0.9826


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6876 - Val Accuracy: 0.9088


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 33/100 - Loss: 0.1955 - Accuracy: 0.9839


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6911 - Val Accuracy: 0.9043


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 34/100 - Loss: 0.1675 - Accuracy: 0.9870


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6738 - Val Accuracy: 0.9066


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 35/100 - Loss: 0.1497 - Accuracy: 0.9898


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6596 - Val Accuracy: 0.8999


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 36/100 - Loss: 0.1373 - Accuracy: 0.9902


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6545 - Val Accuracy: 0.9182


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 37/100 - Loss: 0.1295 - Accuracy: 0.9920


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6711 - Val Accuracy: 0.9127


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 38/100 - Loss: 0.1137 - Accuracy: 0.9933


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6780 - Val Accuracy: 0.9088


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 39/100 - Loss: 0.1114 - Accuracy: 0.9944


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6499 - Val Accuracy: 0.9199


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 40/100 - Loss: 0.0979 - Accuracy: 0.9944


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6748 - Val Accuracy: 0.9182


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 41/100 - Loss: 0.1028 - Accuracy: 0.9936


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6830 - Val Accuracy: 0.9029


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 42/100 - Loss: 0.0925 - Accuracy: 0.9946


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7048 - Val Accuracy: 0.9160


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 43/100 - Loss: 0.0823 - Accuracy: 0.9955


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7694 - Val Accuracy: 0.8829


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 44/100 - Loss: 0.0846 - Accuracy: 0.9947


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6912 - Val Accuracy: 0.9166


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 45/100 - Loss: 0.0738 - Accuracy: 0.9953


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6303 - Val Accuracy: 0.9160


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 46/100 - Loss: 0.0599 - Accuracy: 0.9977


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6891 - Val Accuracy: 0.9166


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 47/100 - Loss: 0.0681 - Accuracy: 0.9964


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6696 - Val Accuracy: 0.9143


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 48/100 - Loss: 0.0635 - Accuracy: 0.9979


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7074 - Val Accuracy: 0.9099


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 49/100 - Loss: 0.0572 - Accuracy: 0.9976


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7021 - Val Accuracy: 0.9132


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 50/100 - Loss: 0.0562 - Accuracy: 0.9979


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6917 - Val Accuracy: 0.9151


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 51/100 - Loss: 0.0524 - Accuracy: 0.9981


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7082 - Val Accuracy: 0.9132


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 52/100 - Loss: 0.0511 - Accuracy: 0.9982


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8325 - Val Accuracy: 0.9049


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 53/100 - Loss: 0.0489 - Accuracy: 0.9981


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7548 - Val Accuracy: 0.9210


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 54/100 - Loss: 0.0408 - Accuracy: 0.9991


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7801 - Val Accuracy: 0.9194


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 55/100 - Loss: 0.0366 - Accuracy: 0.9984


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7597 - Val Accuracy: 0.9110


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 56/100 - Loss: 0.0421 - Accuracy: 0.9981


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7698 - Val Accuracy: 0.9171


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 57/100 - Loss: 0.0425 - Accuracy: 0.9986


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7127 - Val Accuracy: 0.9149


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 58/100 - Loss: 0.0428 - Accuracy: 0.9984


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7350 - Val Accuracy: 0.9132


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 59/100 - Loss: 0.0356 - Accuracy: 0.9994


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7042 - Val Accuracy: 0.9255


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 60/100 - Loss: 0.0290 - Accuracy: 0.9994


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7558 - Val Accuracy: 0.9166


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 61/100 - Loss: 0.0379 - Accuracy: 0.9985


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7110 - Val Accuracy: 0.9205


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 62/100 - Loss: 0.0364 - Accuracy: 0.9989


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7246 - Val Accuracy: 0.9132


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 63/100 - Loss: 0.0379 - Accuracy: 0.9989


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7532 - Val Accuracy: 0.9199


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 64/100 - Loss: 0.0359 - Accuracy: 0.9986


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7932 - Val Accuracy: 0.9079


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 65/100 - Loss: 0.0316 - Accuracy: 0.9991


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7864 - Val Accuracy: 0.9205


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 66/100 - Loss: 0.0306 - Accuracy: 0.9993


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.6689 - Val Accuracy: 0.9260


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 67/100 - Loss: 0.0264 - Accuracy: 0.9991


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7853 - Val Accuracy: 0.9149


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 68/100 - Loss: 0.0239 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7507 - Val Accuracy: 0.9129


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 69/100 - Loss: 0.0218 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7935 - Val Accuracy: 0.9188


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 70/100 - Loss: 0.0247 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8703 - Val Accuracy: 0.9129


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 71/100 - Loss: 0.0244 - Accuracy: 0.9994


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7133 - Val Accuracy: 0.9194


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 72/100 - Loss: 0.0301 - Accuracy: 0.9992


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7182 - Val Accuracy: 0.9249


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 73/100 - Loss: 0.0245 - Accuracy: 0.9992


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7789 - Val Accuracy: 0.9166


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 74/100 - Loss: 0.0254 - Accuracy: 0.9986


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7495 - Val Accuracy: 0.9194


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 75/100 - Loss: 0.0200 - Accuracy: 0.9996


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8192 - Val Accuracy: 0.9205


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 76/100 - Loss: 0.0203 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7969 - Val Accuracy: 0.9216


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 77/100 - Loss: 0.0194 - Accuracy: 0.9997


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8143 - Val Accuracy: 0.9016


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 78/100 - Loss: 0.0201 - Accuracy: 0.9997


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7804 - Val Accuracy: 0.9244


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 79/100 - Loss: 0.0226 - Accuracy: 0.9991


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7938 - Val Accuracy: 0.9090


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 80/100 - Loss: 0.0171 - Accuracy: 0.9996


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7985 - Val Accuracy: 0.9188


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 81/100 - Loss: 0.0153 - Accuracy: 0.9999


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8093 - Val Accuracy: 0.9221


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 82/100 - Loss: 0.0198 - Accuracy: 0.9996


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8169 - Val Accuracy: 0.9135


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 83/100 - Loss: 0.0226 - Accuracy: 0.9992


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7929 - Val Accuracy: 0.9166


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 84/100 - Loss: 0.0196 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7402 - Val Accuracy: 0.9232


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 85/100 - Loss: 0.0174 - Accuracy: 0.9994


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7874 - Val Accuracy: 0.9227


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 86/100 - Loss: 0.0146 - Accuracy: 0.9999


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8294 - Val Accuracy: 0.9149


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 87/100 - Loss: 0.0172 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.9187 - Val Accuracy: 0.9110


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 88/100 - Loss: 0.0162 - Accuracy: 0.9998


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8774 - Val Accuracy: 0.9171


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 89/100 - Loss: 0.0247 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7936 - Val Accuracy: 0.9112


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 90/100 - Loss: 0.0195 - Accuracy: 0.9997


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8012 - Val Accuracy: 0.9232


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 91/100 - Loss: 0.0170 - Accuracy: 0.9997


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7126 - Val Accuracy: 0.9260


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 92/100 - Loss: 0.0165 - Accuracy: 0.9997


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8382 - Val Accuracy: 0.9188


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 93/100 - Loss: 0.0135 - Accuracy: 0.9998


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7708 - Val Accuracy: 0.9194


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 94/100 - Loss: 0.0165 - Accuracy: 0.9999


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7187 - Val Accuracy: 0.9199


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 95/100 - Loss: 0.0156 - Accuracy: 0.9996


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8746 - Val Accuracy: 0.8996


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 96/100 - Loss: 0.0146 - Accuracy: 0.9998


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.9174 - Val Accuracy: 0.9149


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 97/100 - Loss: 0.0158 - Accuracy: 0.9995


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7970 - Val Accuracy: 0.9127


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 98/100 - Loss: 0.0151 - Accuracy: 0.9997


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.7477 - Val Accuracy: 0.9116


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 99/100 - Loss: 0.0128 - Accuracy: 0.9999


  0%|          | 0/31 [00:00<?, ?it/s]

Val Loss: 0.8317 - Val Accuracy: 0.9099


Training:   0%|          | 0/241 [00:00<?, ?it/s]

Epoch: 100/100 - Loss: 0.0182 - Accuracy: 0.9996


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-11-22 21:35:15,528][0m Trial 16 finished with value: 0.9210233092308044 and parameters: {'learning_rate': 0.0003710557372201616, 'momentum': 0.8222236151791261, 'weight_decay': 0.0028156676930909575, 'alpha': 0.2, 'beta': 0.8, 'gamma': 0.2, 'batch_size': 58, 'epochs': 100}. Best is trial 10 with value: 0.928253710269928.[0m


Val Loss: 0.7796 - Val Accuracy: 0.9210
Learning rate: 0.0004810337573037552
Momentum: 0.810889035657051
Weight decay: 0.003099292904248615
Alpha: 0.1
Beta: 1.0
Gamma: 0.1
Batch size: 61
Number of epochs: 84


Epochs:   0%|          | 0/84 [00:00<?, ?it/s]

Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 1/84 - Loss: 1.7881 - Accuracy: 0.4889


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.7580 - Val Accuracy: 0.5845


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 2/84 - Loss: 1.7562 - Accuracy: 0.5461


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.7363 - Val Accuracy: 0.5351


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 3/84 - Loss: 1.7308 - Accuracy: 0.5563


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.6994 - Val Accuracy: 0.5351


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 4/84 - Loss: 1.6866 - Accuracy: 0.5640


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.6317 - Val Accuracy: 0.5822


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 5/84 - Loss: 1.6315 - Accuracy: 0.6178


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.5589 - Val Accuracy: 0.6252


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 6/84 - Loss: 1.5845 - Accuracy: 0.6750


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.5156 - Val Accuracy: 0.7485


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 7/84 - Loss: 1.5156 - Accuracy: 0.7313


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.4078 - Val Accuracy: 0.7638


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 8/84 - Loss: 1.4465 - Accuracy: 0.7639


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.3706 - Val Accuracy: 0.7958


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 9/84 - Loss: 1.3815 - Accuracy: 0.7953


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.3028 - Val Accuracy: 0.7892


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 10/84 - Loss: 1.3026 - Accuracy: 0.8137


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.2244 - Val Accuracy: 0.8247


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 11/84 - Loss: 1.2150 - Accuracy: 0.8359


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.2279 - Val Accuracy: 0.7923


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 12/84 - Loss: 1.1426 - Accuracy: 0.8512


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.1248 - Val Accuracy: 0.8439


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 13/84 - Loss: 1.0515 - Accuracy: 0.8682


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.0308 - Val Accuracy: 0.8512


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 14/84 - Loss: 0.9659 - Accuracy: 0.8837


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.0138 - Val Accuracy: 0.8650


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 15/84 - Loss: 0.9076 - Accuracy: 0.8922


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 1.0097 - Val Accuracy: 0.8546


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 16/84 - Loss: 0.8080 - Accuracy: 0.9080


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9748 - Val Accuracy: 0.8674


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 17/84 - Loss: 0.7374 - Accuracy: 0.9190


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8754 - Val Accuracy: 0.8830


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 18/84 - Loss: 0.6649 - Accuracy: 0.9259


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8494 - Val Accuracy: 0.8778


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 19/84 - Loss: 0.5802 - Accuracy: 0.9413


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8410 - Val Accuracy: 0.8825


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 20/84 - Loss: 0.5479 - Accuracy: 0.9401


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7653 - Val Accuracy: 0.8904


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 21/84 - Loss: 0.4714 - Accuracy: 0.9551


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8832 - Val Accuracy: 0.8397


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 22/84 - Loss: 0.4344 - Accuracy: 0.9584


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8399 - Val Accuracy: 0.8691


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 23/84 - Loss: 0.4102 - Accuracy: 0.9583


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7290 - Val Accuracy: 0.8949


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 24/84 - Loss: 0.3415 - Accuracy: 0.9691


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7995 - Val Accuracy: 0.8962


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 25/84 - Loss: 0.2996 - Accuracy: 0.9749


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9061 - Val Accuracy: 0.8844


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 26/84 - Loss: 0.3202 - Accuracy: 0.9693


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.6846 - Val Accuracy: 0.8998


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 27/84 - Loss: 0.2443 - Accuracy: 0.9798


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7630 - Val Accuracy: 0.9000


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 28/84 - Loss: 0.2302 - Accuracy: 0.9828


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.6382 - Val Accuracy: 0.9105


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 29/84 - Loss: 0.2044 - Accuracy: 0.9847


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7677 - Val Accuracy: 0.9026


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 30/84 - Loss: 0.1842 - Accuracy: 0.9879


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8197 - Val Accuracy: 0.8921


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 31/84 - Loss: 0.1837 - Accuracy: 0.9862


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8251 - Val Accuracy: 0.9083


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 32/84 - Loss: 0.1610 - Accuracy: 0.9890


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7733 - Val Accuracy: 0.9164


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 33/84 - Loss: 0.1347 - Accuracy: 0.9922


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7762 - Val Accuracy: 0.9098


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 34/84 - Loss: 0.1289 - Accuracy: 0.9917


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7343 - Val Accuracy: 0.9070


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 35/84 - Loss: 0.1278 - Accuracy: 0.9922


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7620 - Val Accuracy: 0.9049


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 36/84 - Loss: 0.1169 - Accuracy: 0.9925


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8832 - Val Accuracy: 0.8938


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 37/84 - Loss: 0.1209 - Accuracy: 0.9929


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7697 - Val Accuracy: 0.9130


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 38/84 - Loss: 0.1009 - Accuracy: 0.9954


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7656 - Val Accuracy: 0.9053


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 39/84 - Loss: 0.1254 - Accuracy: 0.9928


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8159 - Val Accuracy: 0.8985


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 40/84 - Loss: 0.0921 - Accuracy: 0.9957


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7400 - Val Accuracy: 0.9183


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 41/84 - Loss: 0.0899 - Accuracy: 0.9947


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8264 - Val Accuracy: 0.9083


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 42/84 - Loss: 0.0791 - Accuracy: 0.9962


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8402 - Val Accuracy: 0.9100


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 43/84 - Loss: 0.0717 - Accuracy: 0.9961


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8849 - Val Accuracy: 0.8889


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 44/84 - Loss: 0.0812 - Accuracy: 0.9960


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7413 - Val Accuracy: 0.9232


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 45/84 - Loss: 0.0699 - Accuracy: 0.9972


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7402 - Val Accuracy: 0.9160


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 46/84 - Loss: 0.0573 - Accuracy: 0.9979


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7677 - Val Accuracy: 0.9053


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 47/84 - Loss: 0.0559 - Accuracy: 0.9984


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7278 - Val Accuracy: 0.9188


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 48/84 - Loss: 0.0490 - Accuracy: 0.9983


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7965 - Val Accuracy: 0.9028


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 49/84 - Loss: 0.0624 - Accuracy: 0.9968


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7644 - Val Accuracy: 0.9205


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 50/84 - Loss: 0.0543 - Accuracy: 0.9977


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8300 - Val Accuracy: 0.9202


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 51/84 - Loss: 0.0465 - Accuracy: 0.9987


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8754 - Val Accuracy: 0.9028


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 52/84 - Loss: 0.0538 - Accuracy: 0.9986


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7484 - Val Accuracy: 0.9220


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 53/84 - Loss: 0.0530 - Accuracy: 0.9979


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7889 - Val Accuracy: 0.9171


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 54/84 - Loss: 0.0486 - Accuracy: 0.9980


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7570 - Val Accuracy: 0.9166


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 55/84 - Loss: 0.0463 - Accuracy: 0.9984


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.7938 - Val Accuracy: 0.9177


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 56/84 - Loss: 0.0411 - Accuracy: 0.9989


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.6854 - Val Accuracy: 0.9239


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 57/84 - Loss: 0.0344 - Accuracy: 0.9989


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8948 - Val Accuracy: 0.9092


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 58/84 - Loss: 0.0396 - Accuracy: 0.9994


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8794 - Val Accuracy: 0.9098


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 59/84 - Loss: 0.0316 - Accuracy: 0.9991


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9816 - Val Accuracy: 0.9049


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 60/84 - Loss: 0.0370 - Accuracy: 0.9989


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9439 - Val Accuracy: 0.9120


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 61/84 - Loss: 0.0306 - Accuracy: 0.9991


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9197 - Val Accuracy: 0.9177


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 62/84 - Loss: 0.0332 - Accuracy: 0.9992


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8660 - Val Accuracy: 0.9177


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 63/84 - Loss: 0.0364 - Accuracy: 0.9984


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8423 - Val Accuracy: 0.9168


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 64/84 - Loss: 0.0360 - Accuracy: 0.9986


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9135 - Val Accuracy: 0.8944


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 65/84 - Loss: 0.0596 - Accuracy: 0.9969


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8229 - Val Accuracy: 0.9132


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 66/84 - Loss: 0.0328 - Accuracy: 0.9991


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9588 - Val Accuracy: 0.9070


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 67/84 - Loss: 0.0309 - Accuracy: 0.9994


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8601 - Val Accuracy: 0.9130


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 68/84 - Loss: 0.0343 - Accuracy: 0.9988


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9146 - Val Accuracy: 0.9143


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 69/84 - Loss: 0.0368 - Accuracy: 0.9986


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8524 - Val Accuracy: 0.9105


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 70/84 - Loss: 0.0284 - Accuracy: 0.9991


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8244 - Val Accuracy: 0.9153


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 71/84 - Loss: 0.0297 - Accuracy: 0.9994


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8451 - Val Accuracy: 0.9187


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 72/84 - Loss: 0.0228 - Accuracy: 0.9996


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8738 - Val Accuracy: 0.9128


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 73/84 - Loss: 0.0307 - Accuracy: 0.9996


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8436 - Val Accuracy: 0.9209


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 74/84 - Loss: 0.0269 - Accuracy: 0.9992


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8003 - Val Accuracy: 0.9228


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 75/84 - Loss: 0.0291 - Accuracy: 0.9991


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8331 - Val Accuracy: 0.9177


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 76/84 - Loss: 0.0260 - Accuracy: 0.9998


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9460 - Val Accuracy: 0.9126


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 77/84 - Loss: 0.0271 - Accuracy: 0.9989


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9106 - Val Accuracy: 0.9215


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 78/84 - Loss: 0.0222 - Accuracy: 0.9995


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9205 - Val Accuracy: 0.9151


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 79/84 - Loss: 0.0268 - Accuracy: 0.9987


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8773 - Val Accuracy: 0.9126


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 80/84 - Loss: 0.0253 - Accuracy: 0.9995


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8677 - Val Accuracy: 0.9188


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 81/84 - Loss: 0.0192 - Accuracy: 0.9997


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.9041 - Val Accuracy: 0.9173


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 82/84 - Loss: 0.0203 - Accuracy: 0.9997


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8999 - Val Accuracy: 0.9143


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 83/84 - Loss: 0.0207 - Accuracy: 0.9996


  0%|          | 0/29 [00:00<?, ?it/s]

Val Loss: 0.8379 - Val Accuracy: 0.9188


Training:   0%|          | 0/230 [00:00<?, ?it/s]

Epoch: 84/84 - Loss: 0.0206 - Accuracy: 0.9999


  0%|          | 0/29 [00:00<?, ?it/s]

[32m[I 2023-11-22 22:53:25,898][0m Trial 17 finished with value: 0.9146714806556702 and parameters: {'learning_rate': 0.0004810337573037552, 'momentum': 0.810889035657051, 'weight_decay': 0.003099292904248615, 'alpha': 0.1, 'beta': 1.0, 'gamma': 0.1, 'batch_size': 61, 'epochs': 84}. Best is trial 10 with value: 0.928253710269928.[0m


Val Loss: 0.9194 - Val Accuracy: 0.9147
Learning rate: 0.0022253058814321386
Momentum: 0.7494344467460066
Weight decay: 0.02299864240002335
Alpha: 0.30000000000000004
Beta: 0.8
Gamma: 0.2
Batch size: 83
Number of epochs: 60


Epochs:   0%|          | 0/60 [00:00<?, ?it/s]

Training:   0%|          | 0/169 [00:00<?, ?it/s]

Epoch: 1/60 - Loss: 1.6544 - Accuracy: 0.5328


  0%|          | 0/22 [00:00<?, ?it/s]

[32m[I 2023-11-22 22:54:31,985][0m Trial 18 pruned. [0m


Val Loss: 1.6270 - Val Accuracy: 0.5342
Learning rate: 0.006331738628921463
Momentum: 0.8283084782930007
Weight decay: 0.002877223544130165
Alpha: 0.2
Beta: 0.8
Gamma: 0.2
Batch size: 56
Number of epochs: 51


Epochs:   0%|          | 0/51 [00:00<?, ?it/s]

Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 1/51 - Loss: 1.6472 - Accuracy: 0.5394


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 1.5207 - Val Accuracy: 0.5839


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 2/51 - Loss: 1.4007 - Accuracy: 0.7197


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 1.2228 - Val Accuracy: 0.7753


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 3/51 - Loss: 1.0749 - Accuracy: 0.8113


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.9857 - Val Accuracy: 0.8191


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 4/51 - Loss: 0.8751 - Accuracy: 0.8525


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.8528 - Val Accuracy: 0.8392


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 5/51 - Loss: 0.7273 - Accuracy: 0.8809


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.7429 - Val Accuracy: 0.8788


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 6/51 - Loss: 0.5963 - Accuracy: 0.9100


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.7896 - Val Accuracy: 0.8552


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 7/51 - Loss: 0.4951 - Accuracy: 0.9262


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6514 - Val Accuracy: 0.8966


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 8/51 - Loss: 0.4293 - Accuracy: 0.9377


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6157 - Val Accuracy: 0.8978


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 9/51 - Loss: 0.3398 - Accuracy: 0.9511


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5776 - Val Accuracy: 0.9100


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 10/51 - Loss: 0.2955 - Accuracy: 0.9577


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5714 - Val Accuracy: 0.9117


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 11/51 - Loss: 0.2470 - Accuracy: 0.9654


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5926 - Val Accuracy: 0.9095


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 12/51 - Loss: 0.2165 - Accuracy: 0.9695


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6136 - Val Accuracy: 0.9090


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 13/51 - Loss: 0.1752 - Accuracy: 0.9774


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6008 - Val Accuracy: 0.9184


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 14/51 - Loss: 0.1589 - Accuracy: 0.9786


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5527 - Val Accuracy: 0.9206


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 15/51 - Loss: 0.1206 - Accuracy: 0.9866


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6540 - Val Accuracy: 0.9145


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 16/51 - Loss: 0.1173 - Accuracy: 0.9867


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5847 - Val Accuracy: 0.9223


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 17/51 - Loss: 0.0937 - Accuracy: 0.9907


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6486 - Val Accuracy: 0.9213


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 18/51 - Loss: 0.0874 - Accuracy: 0.9904


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6121 - Val Accuracy: 0.9184


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 19/51 - Loss: 0.0802 - Accuracy: 0.9919


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6337 - Val Accuracy: 0.9312


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 20/51 - Loss: 0.0744 - Accuracy: 0.9929


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5660 - Val Accuracy: 0.9269


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 21/51 - Loss: 0.0651 - Accuracy: 0.9926


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6546 - Val Accuracy: 0.9184


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 22/51 - Loss: 0.0584 - Accuracy: 0.9936


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5592 - Val Accuracy: 0.9245


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 23/51 - Loss: 0.0658 - Accuracy: 0.9934


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6737 - Val Accuracy: 0.9212


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 24/51 - Loss: 0.0632 - Accuracy: 0.9934


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6373 - Val Accuracy: 0.9206


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 25/51 - Loss: 0.0543 - Accuracy: 0.9951


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.7323 - Val Accuracy: 0.9178


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 26/51 - Loss: 0.0590 - Accuracy: 0.9938


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6653 - Val Accuracy: 0.9173


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 27/51 - Loss: 0.0540 - Accuracy: 0.9944


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6700 - Val Accuracy: 0.9223


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 28/51 - Loss: 0.0598 - Accuracy: 0.9931


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5972 - Val Accuracy: 0.9201


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 29/51 - Loss: 0.0508 - Accuracy: 0.9947


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6715 - Val Accuracy: 0.9257


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 30/51 - Loss: 0.0401 - Accuracy: 0.9967


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6011 - Val Accuracy: 0.9252


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 31/51 - Loss: 0.0440 - Accuracy: 0.9961


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.7528 - Val Accuracy: 0.9184


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 32/51 - Loss: 0.0486 - Accuracy: 0.9954


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6282 - Val Accuracy: 0.9190


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 33/51 - Loss: 0.0442 - Accuracy: 0.9952


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6390 - Val Accuracy: 0.9208


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 34/51 - Loss: 0.0552 - Accuracy: 0.9943


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6086 - Val Accuracy: 0.9230


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 35/51 - Loss: 0.0379 - Accuracy: 0.9968


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5857 - Val Accuracy: 0.9145


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 36/51 - Loss: 0.0416 - Accuracy: 0.9960


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5997 - Val Accuracy: 0.9167


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 37/51 - Loss: 0.0489 - Accuracy: 0.9957


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6091 - Val Accuracy: 0.9223


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 38/51 - Loss: 0.0469 - Accuracy: 0.9955


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6404 - Val Accuracy: 0.9134


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 39/51 - Loss: 0.0532 - Accuracy: 0.9934


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6255 - Val Accuracy: 0.9206


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 40/51 - Loss: 0.0496 - Accuracy: 0.9951


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6377 - Val Accuracy: 0.9208


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 41/51 - Loss: 0.0353 - Accuracy: 0.9970


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5746 - Val Accuracy: 0.9291


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 42/51 - Loss: 0.0576 - Accuracy: 0.9938


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6821 - Val Accuracy: 0.9173


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 43/51 - Loss: 0.0479 - Accuracy: 0.9957


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5649 - Val Accuracy: 0.9212


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 44/51 - Loss: 0.0409 - Accuracy: 0.9964


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.7454 - Val Accuracy: 0.9000


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 45/51 - Loss: 0.0399 - Accuracy: 0.9964


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6499 - Val Accuracy: 0.9286


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 46/51 - Loss: 0.0485 - Accuracy: 0.9954


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5885 - Val Accuracy: 0.9218


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 47/51 - Loss: 0.0416 - Accuracy: 0.9959


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6673 - Val Accuracy: 0.9162


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 48/51 - Loss: 0.0471 - Accuracy: 0.9952


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.5686 - Val Accuracy: 0.9219


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 49/51 - Loss: 0.0370 - Accuracy: 0.9971


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6981 - Val Accuracy: 0.9223


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 50/51 - Loss: 0.0279 - Accuracy: 0.9979


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6837 - Val Accuracy: 0.9240


Training:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch: 51/51 - Loss: 0.0446 - Accuracy: 0.9957


  0%|          | 0/32 [00:00<?, ?it/s]

Val Loss: 0.6037 - Val Accuracy: 0.9307
Saving best model...


[32m[I 2023-11-22 23:43:07,575][0m Trial 19 finished with value: 0.9306796193122864 and parameters: {'learning_rate': 0.006331738628921463, 'momentum': 0.8283084782930007, 'weight_decay': 0.002877223544130165, 'alpha': 0.2, 'beta': 0.8, 'gamma': 0.2, 'batch_size': 56, 'epochs': 51}. Best is trial 19 with value: 0.9306796193122864.[0m


Learning rate: 0.005005576120610876
Momentum: 0.8491926016135631
Weight decay: 0.007646559795983791
Alpha: 0.4
Beta: 0.9
Gamma: 0.30000000000000004
Batch size: 49
Number of epochs: 50


Epochs:   0%|          | 0/50 [00:00<?, ?it/s]

Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 1/50 - Loss: 1.8599 - Accuracy: 0.5338


  0%|          | 0/36 [00:00<?, ?it/s]

[32m[I 2023-11-22 23:44:16,423][0m Trial 20 pruned. [0m


Val Loss: 1.7747 - Val Accuracy: 0.5347

Study statistics: 
  Number of finished trials:  21
  Number of pruned trials:  10
  Number of complete trials:  10


In [17]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9306796193122864
  Params: 
    alpha: 0.2
    batch_size: 56
    beta: 0.8
    epochs: 51
    gamma: 0.2
    learning_rate: 0.006331738628921463
    momentum: 0.8283084782930007
    weight_decay: 0.002877223544130165


In [None]:
# base model

Best trial:
Value:  0.9346662163734436
Params: 
batch_size: 97
beta: 0.5
epochs: 95
gamma: 0.8
learning_rate: 0.004855095881098637
momentum: 0.9084985321458035
weight_decay: 0.0016962190703003097

In [None]:
# skip model

Best trial:
Value:  0.9301802515983582
Params: 
batch_size: 37
beta: 0.1
epochs: 51
gamma: 0.8
learning_rate: 0.0011662086331421003
momentum: 0.8932599890221394
weight_decay: 0.0015352786162897319

In [None]:
# skip model (alpha=0.2)

Best trial:
Value:  0.9306796193122864
Params: 
alpha: 0.2
batch_size: 56
beta: 0.8
epochs: 51
gamma: 0.2
learning_rate: 0.006331738628921463
momentum: 0.8283084782930007
weight_decay: 0.002877223544130165