In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as opt
import torch.nn.functional as F
from torchvision.io import read_image
from torchvision import transforms
from torchsummary import summary
from tqdm.notebook import tqdm
from PIL import Image
import optuna
from optuna.trial import TrialState

In [2]:
random.seed(2020)

In [3]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [4]:
resize_image = transforms.Resize(size=(227, 227))

In [5]:
def calculate_mean(dataset):
    mean = torch.Tensor([0.,0.,0.])
    
    num_samples = len(dataset)
    for i in range(num_samples):
        image, _, _, _ = dataset[i]
        image = image.type(torch.FloatTensor) / 255
        
        for j in range(3):
            mean[j] += torch.mean(image[j, :, :])
            
    mean = mean / num_samples
    return mean

In [6]:
def subtract_mean(image, mean):
    normed_image = torch.zeros(image.size())
    image = image.type(torch.FloatTensor) / 255
    
    for j in range(3):
        normed_image[j] = image[j, :, :] - mean[j]
    return normed_image

In [7]:
class AdienceDataset(Dataset):
    def __init__(self, annot_file, img_dir, train=False, transform=None):
        self.img_lbls = pd.read_csv(annot_file, header=None)
        self.img_dir = img_dir
        self.transform = transform
        self.is_train = train
        self.mean = torch.Tensor([0.,0.,0.])
    
    def __len__(self):
        return len(self.img_lbls)
    
    def __getitem__(self, idx):
        img_file = self.img_lbls.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_file)
        image = read_image(img_path)
        # image = resize_image(image)
        label = self.img_lbls.iloc[idx, 1]
    
        if self.is_train:
            positive_list = self.img_lbls[self.img_lbls.iloc[:, 1] == label].index.values
            positive_list = np.setdiff1d(positive_list, np.array([idx]))
            positive_item = random.choice(positive_list)
            positive_img = self.img_lbls.iloc[positive_item, 0]
            pos_img_path = os.path.join(self.img_dir, positive_img)
            pos_image = read_image(pos_img_path)
            
            negative_list = self.img_lbls[self.img_lbls.iloc[:, 1] != label].index.values
            negative_item = random.choice(negative_list)
            negative_img = self.img_lbls.iloc[negative_item, 0]
            neg_img_path = os.path.join(self.img_dir, negative_img)
            neg_image = read_image(neg_img_path)

            if self.transform:
                image = self.transform(image, self.mean)
                pos_image = self.transform(pos_image, self.mean)
                neg_image = self.transform(neg_image, self.mean)
                
            return image, pos_image, neg_image, label
        else:
            if self.transform:
                image = self.transform(image, self.mean)

            return image, label
    
    def set_mean(self, mean):
        self.mean = mean

In [8]:
# train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True)
train_data = AdienceDataset("../train.csv", "../cropped_Adience/", train=True, transform=subtract_mean)
test_data = AdienceDataset("../test.csv", "../cropped_Adience/", train=True, transform=subtract_mean)
val_data = AdienceDataset("../val.csv", "../cropped_Adience/", train=True, transform=subtract_mean)

In [9]:
# mean = calculate_mean(train_data)
mean = torch.Tensor([0.5062, 0.3581, 0.3099])
mean

tensor([0.5062, 0.3581, 0.3099])

In [10]:
train_data.set_mean(mean)
test_data.set_mean(mean)
val_data.set_mean(mean)

In [11]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()

In [12]:
class CombinedLoss(nn.Module):
    def __init__(self, alpha=1.0, beta=1.0):
        super(CombinedLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.triplet = TripletLoss(margin=1.0)
        self.classification = nn.CrossEntropyLoss()
        
    def forward(self, anchor, positive, negative, classification_out, labels):
        triplet_loss = self.triplet(anchor, positive, negative)
        classification_loss = self.classification(classification_out, labels)
        total_loss = (self.alpha * triplet_loss) + (self.beta * classification_loss)
        
        return total_loss

In [13]:
class Net(nn.Module):
    def __init__(self, num_channels, num_classes):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(num_channels, 96, kernel_size=7, stride=4)
        self.conv11 = nn.Conv2d(96, 256, kernel_size=1)
        self.conv2 = nn.Conv2d(96, 256, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(256, 256, kernel_size=5, padding=2)
        self.conv33 = nn.Conv2d(256, 384, kernel_size=1)
        self.conv4 = nn.Conv2d(256, 384, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(384, 384, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm = nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75)
        
        self.fc6 = nn.Linear(in_features=13824, out_features=512)
        self.fc7 = nn.Linear(in_features=512, out_features=512)
        self.fc8 = nn.Linear(in_features=512, out_features=num_classes)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x, alpha: float):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.norm(x)
        
        sl1 = self.conv11(x)
        sl1 = self.relu(sl1)
        
        x = self.conv2(x)
        x = self.relu((1-alpha)*x + alpha*sl1)
        # x = self.relu(x + sl1)
        # x = self.relu(x)
        sl2 = x
        
        x = self.conv3(x)
        x = self.relu((1-alpha)*x + alpha*sl2)
        # x = self.relu(x + sl2)
        # x = self.relu(x)
        x = self.pool(x)
        x = self.norm(x)
        
        sl3 = self.conv33(x)
        sl3 = self.relu(sl3)
        
        x = self.conv4(x)
        x = self.relu((1-alpha)*x + alpha*sl3)
        # x = self.relu(x + sl3)
        # x = self.relu(x)
        sl4 = x
        
        x = self.conv5(x)
        x = self.relu((1-alpha)*x + alpha*sl4)
        # x = self.relu(x + sl4)
        # x = self.relu(x)
        x = self.pool(x)
        x = self.norm(x)
        
        x = torch.flatten(x, 1)
        x = self.fc6(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc7(x)
        x_triplet = x
        x = self.relu(x)
        x = self.dropout(x)
        
        x_classification = self.fc8(x)
        
        return x_triplet, x_classification

In [14]:
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight)

In [16]:
best_accu = 0.9238635897636414
def objective(trial):
    model = Net(3, 2)
    model.apply(init_weights)
    model = torch.jit.script(model).to(device)
    
    lr = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    mom = trial.suggest_float('momentum', 0.7, 0.99)
    wd = trial.suggest_float('weight_decay', 1e-5, 1e-1, log=True)
    optimizer = opt.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd)
    criterion = torch.jit.script(CombinedLoss())
    
    alpha = trial.suggest_float("alpha", 0.1, 0.5, step=0.1)
    batch_size = trial.suggest_int('batch_size', 10, 100)
    num_epochs = trial.suggest_int('epochs', 10, 100)
    
    print("Learning rate: "+ str(lr))
    print("Momentum: "+ str(mom))
    print("Weight decay: "+ str(wd))
    print("Alpha: "+ str(alpha))
    print("Batch size: "+ str(batch_size))
    print("Number of epochs: "+ str(num_epochs))
    
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # train(model, optim, train_loader)
        # training loop
        running_loss = []
        running_accu = []
        
        model.train()
        for step, (img, pos_img, neg_img, label) in enumerate(tqdm(train_loader, desc="Training", leave=False)):
            img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)

            optimizer.zero_grad()
            anchor, output = model(img, alpha)
            pos, _ = model(pos_img, alpha)
            neg, _ = model(neg_img, alpha)
            
            pred = torch.argmax(output, 1)
            accuracy = torch.eq(pred, label).sum() / len(img)

            loss = criterion(anchor, pos, neg, output, label)
            loss.backward()
            optimizer.step()

            running_accu.append(accuracy.cpu().detach().numpy())
            running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} - Loss: {:.4f} - Accuracy: {:.4f}".format(epoch+1, num_epochs, np.mean(running_loss), np.mean(running_accu)))
        
        # accuracy = test(model, val_loader)
        # validation loop
        val_loss = []
        val_accu = []

        model.eval()
        with torch.no_grad():
            for img, pos_img, neg_img, label in tqdm(val_loader):
                img, pos_img, neg_img, label = img.to(device), pos_img.to(device), neg_img.to(device), label.to(device)
                
                anchor, output = model(img, alpha)
                pos, _ = model(pos_img, alpha)
                neg, _ = model(neg_img, alpha)
                
                pred = torch.argmax(output, 1)
                accuracy = torch.eq(pred, label).sum() / len(img)
                
                loss = criterion(anchor, pos, neg, output, label)
                
                val_accu.append(accuracy.cpu().detach().numpy())
                val_loss.append(loss.cpu().detach().numpy())
        val_accu = np.mean(val_accu)
        val_loss = np.mean(val_loss)
        print("Val Loss: {:.4f} - Val Accuracy: {:.4f}".format(val_loss, val_accu))
        
        trial.report(val_accu, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    global best_accu
    if val_accu > best_accu:
        best_accu = val_accu
        print("Saving best model...")
        torch.save(model.state_dict(), "../incr_skip_triplet_00.pt")
            
    return val_accu

In [17]:
study = optuna.create_study(direction='maximize',
                            study_name='incremented-skip-alpha-triplet-libfaceid-study',
                            storage='sqlite:///study.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=1)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-11-19 16:10:55,240][0m Using an existing study with name 'incremented-skip-alpha-triplet-libfaceid-study' instead of creating a new one.[0m


Learning rate: 0.0003351653872818826
Momentum: 0.7715726842658507
Weight decay: 0.0017397239196054414
Alpha: 0.1
Batch size: 49
Number of epochs: 81


Epochs:   0%|          | 0/81 [00:00<?, ?it/s]

Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 1/81 - Loss: 1.6994 - Accuracy: 0.5334


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6915 - Val Accuracy: 0.5347


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 2/81 - Loss: 1.6940 - Accuracy: 0.5343


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6894 - Val Accuracy: 0.5347


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 3/81 - Loss: 1.6907 - Accuracy: 0.5347


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6874 - Val Accuracy: 0.5347


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 4/81 - Loss: 1.6865 - Accuracy: 0.5360


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6846 - Val Accuracy: 0.5347


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 5/81 - Loss: 1.6798 - Accuracy: 0.5375


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6775 - Val Accuracy: 0.5347


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 6/81 - Loss: 1.6708 - Accuracy: 0.5404


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6631 - Val Accuracy: 0.5347


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 7/81 - Loss: 1.6505 - Accuracy: 0.5461


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.6335 - Val Accuracy: 0.5375


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 8/81 - Loss: 1.6335 - Accuracy: 0.5601


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.5895 - Val Accuracy: 0.5464


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 9/81 - Loss: 1.5993 - Accuracy: 0.5864


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.5626 - Val Accuracy: 0.6644


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 10/81 - Loss: 1.5812 - Accuracy: 0.6120


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.5151 - Val Accuracy: 0.6118


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 11/81 - Loss: 1.5292 - Accuracy: 0.6559


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.4359 - Val Accuracy: 0.7419


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 12/81 - Loss: 1.4938 - Accuracy: 0.6999


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.4078 - Val Accuracy: 0.7402


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 13/81 - Loss: 1.4357 - Accuracy: 0.7419


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.3641 - Val Accuracy: 0.7908


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 14/81 - Loss: 1.3897 - Accuracy: 0.7671


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.2770 - Val Accuracy: 0.7996


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 15/81 - Loss: 1.3380 - Accuracy: 0.7875


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.2643 - Val Accuracy: 0.8101


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 16/81 - Loss: 1.2820 - Accuracy: 0.8065


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.2184 - Val Accuracy: 0.8187


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 17/81 - Loss: 1.2229 - Accuracy: 0.8250


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.1895 - Val Accuracy: 0.8240


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 18/81 - Loss: 1.1602 - Accuracy: 0.8377


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.1090 - Val Accuracy: 0.8322


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 19/81 - Loss: 1.0920 - Accuracy: 0.8498


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.0848 - Val Accuracy: 0.8421


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 20/81 - Loss: 1.0365 - Accuracy: 0.8613


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.0473 - Val Accuracy: 0.8468


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 21/81 - Loss: 0.9730 - Accuracy: 0.8740


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 1.0289 - Val Accuracy: 0.8519


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 22/81 - Loss: 0.9207 - Accuracy: 0.8788


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9550 - Val Accuracy: 0.8650


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 23/81 - Loss: 0.8374 - Accuracy: 0.8965


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9150 - Val Accuracy: 0.8574


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 24/81 - Loss: 0.7948 - Accuracy: 0.9022


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8726 - Val Accuracy: 0.8741


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 25/81 - Loss: 0.7139 - Accuracy: 0.9136


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8780 - Val Accuracy: 0.8733


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 26/81 - Loss: 0.6627 - Accuracy: 0.9247


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8732 - Val Accuracy: 0.8752


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 27/81 - Loss: 0.6142 - Accuracy: 0.9308


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7820 - Val Accuracy: 0.8812


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 28/81 - Loss: 0.5531 - Accuracy: 0.9397


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8197 - Val Accuracy: 0.8750


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 29/81 - Loss: 0.5065 - Accuracy: 0.9465


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7746 - Val Accuracy: 0.8894


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 30/81 - Loss: 0.4606 - Accuracy: 0.9532


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7868 - Val Accuracy: 0.8744


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 31/81 - Loss: 0.4140 - Accuracy: 0.9615


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7581 - Val Accuracy: 0.8948


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 32/81 - Loss: 0.3719 - Accuracy: 0.9648


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7872 - Val Accuracy: 0.8794


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 33/81 - Loss: 0.3498 - Accuracy: 0.9676


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7394 - Val Accuracy: 0.8841


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 34/81 - Loss: 0.3088 - Accuracy: 0.9717


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7383 - Val Accuracy: 0.8712


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 35/81 - Loss: 0.2932 - Accuracy: 0.9744


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.6828 - Val Accuracy: 0.9050


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 36/81 - Loss: 0.2581 - Accuracy: 0.9782


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7610 - Val Accuracy: 0.9003


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 37/81 - Loss: 0.2489 - Accuracy: 0.9787


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.6862 - Val Accuracy: 0.8982


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 38/81 - Loss: 0.2203 - Accuracy: 0.9838


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7846 - Val Accuracy: 0.8923


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 39/81 - Loss: 0.2175 - Accuracy: 0.9854


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.6727 - Val Accuracy: 0.8903


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 40/81 - Loss: 0.1911 - Accuracy: 0.9858


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8363 - Val Accuracy: 0.8909


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 41/81 - Loss: 0.1673 - Accuracy: 0.9873


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7825 - Val Accuracy: 0.9128


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 42/81 - Loss: 0.1729 - Accuracy: 0.9882


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8107 - Val Accuracy: 0.8956


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 43/81 - Loss: 0.1471 - Accuracy: 0.9916


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7387 - Val Accuracy: 0.9048


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 44/81 - Loss: 0.1444 - Accuracy: 0.9919


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7649 - Val Accuracy: 0.9122


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 45/81 - Loss: 0.1235 - Accuracy: 0.9926


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8185 - Val Accuracy: 0.9031


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 46/81 - Loss: 0.1228 - Accuracy: 0.9918


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7711 - Val Accuracy: 0.9031


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 47/81 - Loss: 0.1196 - Accuracy: 0.9920


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7326 - Val Accuracy: 0.9071


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 48/81 - Loss: 0.1065 - Accuracy: 0.9944


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9341 - Val Accuracy: 0.8719


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 49/81 - Loss: 0.1147 - Accuracy: 0.9929


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7336 - Val Accuracy: 0.9099


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 50/81 - Loss: 0.0996 - Accuracy: 0.9954


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7405 - Val Accuracy: 0.9133


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 51/81 - Loss: 0.0908 - Accuracy: 0.9956


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7950 - Val Accuracy: 0.9075


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 52/81 - Loss: 0.1000 - Accuracy: 0.9947


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.6993 - Val Accuracy: 0.9130


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 53/81 - Loss: 0.0844 - Accuracy: 0.9949


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7195 - Val Accuracy: 0.9094


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 54/81 - Loss: 0.0792 - Accuracy: 0.9958


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9353 - Val Accuracy: 0.8946


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 55/81 - Loss: 0.0727 - Accuracy: 0.9962


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8300 - Val Accuracy: 0.9079


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 56/81 - Loss: 0.0836 - Accuracy: 0.9967


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7767 - Val Accuracy: 0.9035


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 57/81 - Loss: 0.0636 - Accuracy: 0.9976


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8631 - Val Accuracy: 0.9062


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 58/81 - Loss: 0.0703 - Accuracy: 0.9958


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.7604 - Val Accuracy: 0.9158


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 59/81 - Loss: 0.0656 - Accuracy: 0.9961


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8899 - Val Accuracy: 0.9018


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 60/81 - Loss: 0.0567 - Accuracy: 0.9975


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8094 - Val Accuracy: 0.9084


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 61/81 - Loss: 0.0671 - Accuracy: 0.9962


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8015 - Val Accuracy: 0.9128


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 62/81 - Loss: 0.0604 - Accuracy: 0.9970


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8590 - Val Accuracy: 0.9028


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 63/81 - Loss: 0.0544 - Accuracy: 0.9981


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9022 - Val Accuracy: 0.9062


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 64/81 - Loss: 0.0513 - Accuracy: 0.9979


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8632 - Val Accuracy: 0.9096


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 65/81 - Loss: 0.0555 - Accuracy: 0.9980


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8574 - Val Accuracy: 0.8954


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 66/81 - Loss: 0.0619 - Accuracy: 0.9965


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8990 - Val Accuracy: 0.8954


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 67/81 - Loss: 0.0538 - Accuracy: 0.9977


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8513 - Val Accuracy: 0.9065


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 68/81 - Loss: 0.0534 - Accuracy: 0.9984


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8770 - Val Accuracy: 0.9073


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 69/81 - Loss: 0.0440 - Accuracy: 0.9987


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8429 - Val Accuracy: 0.9056


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 70/81 - Loss: 0.0465 - Accuracy: 0.9983


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8983 - Val Accuracy: 0.9071


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 71/81 - Loss: 0.0405 - Accuracy: 0.9982


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8485 - Val Accuracy: 0.9099


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 72/81 - Loss: 0.0403 - Accuracy: 0.9984


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8645 - Val Accuracy: 0.9122


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 73/81 - Loss: 0.0313 - Accuracy: 0.9994


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9143 - Val Accuracy: 0.9128


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 74/81 - Loss: 0.0384 - Accuracy: 0.9988


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8617 - Val Accuracy: 0.9139


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 75/81 - Loss: 0.0409 - Accuracy: 0.9991


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9162 - Val Accuracy: 0.9111


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 76/81 - Loss: 0.0381 - Accuracy: 0.9985


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9073 - Val Accuracy: 0.9054


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 77/81 - Loss: 0.0423 - Accuracy: 0.9985


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8342 - Val Accuracy: 0.9073


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 78/81 - Loss: 0.0376 - Accuracy: 0.9984


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8396 - Val Accuracy: 0.9124


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 79/81 - Loss: 0.0378 - Accuracy: 0.9989


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.8605 - Val Accuracy: 0.9099


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 80/81 - Loss: 0.0390 - Accuracy: 0.9985


  0%|          | 0/36 [00:00<?, ?it/s]

Val Loss: 0.9670 - Val Accuracy: 0.9096


Training:   0%|          | 0/286 [00:00<?, ?it/s]

Epoch: 81/81 - Loss: 0.0357 - Accuracy: 0.9987


  0%|          | 0/36 [00:00<?, ?it/s]

[32m[I 2023-11-19 17:46:42,211][0m Trial 5 finished with value: 0.9082199931144714 and parameters: {'learning_rate': 0.0003351653872818826, 'momentum': 0.7715726842658507, 'weight_decay': 0.0017397239196054414, 'alpha': 0.1, 'batch_size': 49, 'epochs': 81}. Best is trial 3 with value: 0.9238635897636414.[0m


Val Loss: 0.8156 - Val Accuracy: 0.9082

Study statistics: 
  Number of finished trials:  6
  Number of pruned trials:  0
  Number of complete trials:  5


In [18]:
study = optuna.create_study(direction='maximize',
                            study_name='incremented-skip-alpha-triplet-libfaceid-study',
                            storage='sqlite:///study.db',
                            load_if_exists=True)
study.optimize(objective, n_trials=5)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

[32m[I 2023-11-19 17:46:42,302][0m Using an existing study with name 'incremented-skip-alpha-triplet-libfaceid-study' instead of creating a new one.[0m


Learning rate: 0.009585726485878144
Momentum: 0.7294728974244998
Weight decay: 0.007569151600620028
Alpha: 0.2
Batch size: 18
Number of epochs: 62


Epochs:   0%|          | 0/62 [00:00<?, ?it/s]

Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 1/62 - Loss: 1.6979 - Accuracy: 0.5351


  0%|          | 0/97 [00:00<?, ?it/s]

Val Loss: 1.6901 - Val Accuracy: 0.5357


Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 2/62 - Loss: 1.6905 - Accuracy: 0.5362


  0%|          | 0/97 [00:00<?, ?it/s]

Val Loss: 1.6902 - Val Accuracy: 0.5357


Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 3/62 - Loss: 1.6906 - Accuracy: 0.5359


  0%|          | 0/97 [00:00<?, ?it/s]

Val Loss: 1.6905 - Val Accuracy: 0.5357


Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 4/62 - Loss: 1.6908 - Accuracy: 0.5355


  0%|          | 0/97 [00:00<?, ?it/s]

Val Loss: 1.6906 - Val Accuracy: 0.5357


Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 5/62 - Loss: 1.6909 - Accuracy: 0.5358


  0%|          | 0/97 [00:00<?, ?it/s]

Val Loss: 1.6908 - Val Accuracy: 0.5357


Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 6/62 - Loss: 1.6909 - Accuracy: 0.5360


  0%|          | 0/97 [00:00<?, ?it/s]

Val Loss: 1.6909 - Val Accuracy: 0.5357


Training:   0%|          | 0/777 [00:00<?, ?it/s]

Epoch: 7/62 - Loss: 1.6909 - Accuracy: 0.5356


  0%|          | 0/97 [00:00<?, ?it/s]

[32m[I 2023-11-19 17:57:17,344][0m Trial 6 pruned. [0m


Val Loss: 1.6908 - Val Accuracy: 0.5357
Learning rate: 1.7152071210749663e-05
Momentum: 0.8834025374834498
Weight decay: 0.0024137262320541334
Alpha: 0.2
Batch size: 54
Number of epochs: 100


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/259 [00:00<?, ?it/s]

Epoch: 1/100 - Loss: 1.7057 - Accuracy: 0.5292


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-11-19 17:58:30,086][0m Trial 7 pruned. [0m


Val Loss: 1.6894 - Val Accuracy: 0.5320
Learning rate: 3.787145251755529e-05
Momentum: 0.9370912072383404
Weight decay: 0.00010782294050508204
Alpha: 0.30000000000000004
Batch size: 48
Number of epochs: 38


Epochs:   0%|          | 0/38 [00:00<?, ?it/s]

Training:   0%|          | 0/292 [00:00<?, ?it/s]

Epoch: 1/38 - Loss: 1.6919 - Accuracy: 0.5232


  0%|          | 0/37 [00:00<?, ?it/s]

[32m[I 2023-11-19 17:59:47,172][0m Trial 8 pruned. [0m


Val Loss: 1.6886 - Val Accuracy: 0.5326
Learning rate: 3.6072814751439224e-05
Momentum: 0.9003306798523888
Weight decay: 0.00015459549062501364
Alpha: 0.5
Batch size: 35
Number of epochs: 70


Epochs:   0%|          | 0/70 [00:00<?, ?it/s]

Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 1/70 - Loss: 1.6993 - Accuracy: 0.4814


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6910 - Val Accuracy: 0.5705


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 2/70 - Loss: 1.6906 - Accuracy: 0.5248


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6885 - Val Accuracy: 0.5360


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 3/70 - Loss: 1.6884 - Accuracy: 0.5437


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6882 - Val Accuracy: 0.5355


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 4/70 - Loss: 1.6889 - Accuracy: 0.5449


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6896 - Val Accuracy: 0.5355


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 5/70 - Loss: 1.6876 - Accuracy: 0.5404


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6868 - Val Accuracy: 0.5355


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 6/70 - Loss: 1.6886 - Accuracy: 0.5485


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6848 - Val Accuracy: 0.5361


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 7/70 - Loss: 1.6844 - Accuracy: 0.5566


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6826 - Val Accuracy: 0.5435


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 8/70 - Loss: 1.6818 - Accuracy: 0.5536


  0%|          | 0/50 [00:00<?, ?it/s]

Val Loss: 1.6802 - Val Accuracy: 0.5367


Training:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 9/70 - Loss: 1.6832 - Accuracy: 0.5560


  0%|          | 0/50 [00:00<?, ?it/s]

[32m[I 2023-11-19 18:12:00,029][0m Trial 9 pruned. [0m


Val Loss: 1.6797 - Val Accuracy: 0.5384
Learning rate: 1.4316677914464993e-05
Momentum: 0.7234860285349198
Weight decay: 0.0004785714287947506
Alpha: 0.4
Batch size: 27
Number of epochs: 50


Epochs:   0%|          | 0/50 [00:00<?, ?it/s]

Training:   0%|          | 0/518 [00:00<?, ?it/s]

Epoch: 1/50 - Loss: 1.7067 - Accuracy: 0.4745


  0%|          | 0/65 [00:00<?, ?it/s]

[32m[I 2023-11-19 18:13:23,535][0m Trial 10 pruned. [0m


Val Loss: 1.6892 - Val Accuracy: 0.4658

Study statistics: 
  Number of finished trials:  11
  Number of pruned trials:  5
  Number of complete trials:  5


In [19]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.9238635897636414
  Params: 
    alpha: 0.1
    batch_size: 22
    learning_rate: 0.00027039716003689977
    momentum: 0.8456179921920867
    weight_decay: 1.3141490080199303e-05


In [None]:
# base model

Best trial:
Value:  0.9344444870948792
Params: 
batch_size: 60
learning_rate: 0.009437359045100662
momentum: 0.7021887822156345
weight_decay: 5.982011475092227e-05

In [None]:
# skip model

Best trial:
Value:  0.9204047918319702
Params: 
batch_size: 39
learning_rate: 0.0003622895790391199
momentum: 0.8296680142263324
weight_decay: 0.004568653389880783

In [None]:
# skip model (alpha=0.1)

Best trial:
Value:  0.9238635897636414
Params:
batch_size: 22
learning_rate: 0.00027039716003689977
momentum: 0.8456179921920867
weight_decay: 1.3141490080199303e-05