In [19]:
from collections import OrderedDict
from math import ceil, floor
import itertools
import datetime
import os
import os.path as osp
import tarfile
import urllib.request

from torchvision import datasets, transforms
import numpy as np
import torch.nn as nn
import torch.utils.data as data
import torch
import torchfile
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn import svm, metrics, model_selection
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_validate

In [7]:
class VGGFace(nn.Module):
    def __init__(self):
        super(VGGFace, self).__init__()

        self.features = nn.ModuleDict(OrderedDict(
            {
                # === Block 1 ===
                'conv_1_1': nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
                'relu_1_1': nn.ReLU(inplace=True),
                'conv_1_2': nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
                'relu_1_2': nn.ReLU(inplace=True),
                'maxp_1_2': nn.MaxPool2d(kernel_size=2, stride=2),
                # === Block 2 ===
                'conv_2_1': nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
                'relu_2_1': nn.ReLU(inplace=True),
                'conv_2_2': nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
                'relu_2_2': nn.ReLU(inplace=True),
                'maxp_2_2': nn.MaxPool2d(kernel_size=2, stride=2),
                # === Block 3 ===
                'conv_3_1': nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
                'relu_3_1': nn.ReLU(inplace=True),
                'conv_3_2': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
                'relu_3_2': nn.ReLU(inplace=True),
                'conv_3_3': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
                'relu_3_3': nn.ReLU(inplace=True),
                'maxp_3_3': nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
                # === Block 4 ===
                'conv_4_1': nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
                'relu_4_1': nn.ReLU(inplace=True),
                'conv_4_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
                'relu_4_2': nn.ReLU(inplace=True),
                'conv_4_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
                'relu_4_3': nn.ReLU(inplace=True),
                'maxp_4_3': nn.MaxPool2d(kernel_size=2, stride=2),
                # === Block 5 ===
                'conv_5_1': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
                'relu_5_1': nn.ReLU(inplace=True),
                'conv_5_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
                'relu_5_2': nn.ReLU(inplace=True),
                'conv_5_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
                'relu_5_3': nn.ReLU(inplace=True),
                'maxp_5_3': nn.MaxPool2d(kernel_size=2, stride=2)
            }))

        self.fc = nn.ModuleDict(OrderedDict(
            {
                'fc6': nn.Linear(in_features=512 * 7 * 7, out_features=4096),
                'fc6-relu': nn.ReLU(inplace=True),
                'fc6-dropout': nn.Dropout(p=0.5),
                'fc7': nn.Linear(in_features=4096, out_features=4096),
                'fc7-relu': nn.ReLU(inplace=True),
                'fc7-dropout': nn.Dropout(p=0.5),
                'fc8': nn.Linear(in_features=4096, out_features=2622),
            }))

    def forward(self, x):
        # Forward through feature layers
        for k, layer in self.features.items():
            x = layer(x)

        # Flatten convolution outputs
        x = x.view(x.size(0), -1)

        # Forward through FC layers
        for k, layer in self.fc.items():
            x = layer(x)

        return x


# VGGFace training

## 1. Create embeddings for SVM

In [8]:
def create_embeddings(model, dataloader: data.DataLoader, path_embed: str, path_labels: str):
    """ Creates embeddings and labels to files

    Args:
        model: Neural model
        dataloader: Dataloader
        path_embed: Path to embeddings to save
        path_labels: Path to labels to save
    """
    embeddings = []
    labels = []
    for batch in tqdm(dataloader):
        data, label = batch
        embeddings.append(*model(data).tolist())
        labels.append(*label.tolist())
    np.save(path_embed, embeddings)
    np.save(path_labels, labels)

In [9]:
def get_feature_extracting_model():
    """ Get feature-extracting model

    Returns:
        Feature-extracting model
    """
    # Build VGGFace model and load pre-trained weights
    model = VGGFace()
    model_dict = torch.load('models/vggface.pth', map_location=lambda storage, loc: storage)
    model.load_state_dict(model_dict)
    model.eval()

    # Fine-tuning
    model.fc['fc7-relu'] = nn.Identity()
    model.fc['fc7-dropout'] = nn.Identity()
    model.fc['fc8'] = nn.Identity()

    # Freezing weights
    for param in model.parameters():
        param.requires_grad = False

    return model

In [15]:
# Create embeddings
model = get_feature_extracting_model()
print(model)
print('Getting dataloaders...')
# Dataset must be located in folder "dataset"
dataset = datasets.ImageFolder('dataset', transform=transforms.Compose([transforms.ToTensor()]))
dataloader = data.DataLoader(dataset, batch_size=1, shuffle=False)
print('Done!')

print('Create embeddings...')
model.eval()
# Embeddings and labels will be stored in folder "embeds"
create_embeddings(model, dataloader, 'embeds/embeds.npy', 'embeds/labels.npy')
print('Done!')

VGGFace(
  (features): ModuleDict(
    (conv_1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_1_1): ReLU(inplace=True)
    (conv_1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_1_2): ReLU(inplace=True)
    (maxp_1_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv_2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_2_1): ReLU(inplace=True)
    (conv_2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_2_2): ReLU(inplace=True)
    (maxp_2_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv_3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_3_1): ReLU(inplace=True)
    (conv_3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_3_2): ReLU(inplace=True)
    (conv_3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding

100%|██████████| 926/926 [04:37<00:00,  3.33it/s]


Done!


## 3. Training a SVM classifier and calculate the metrics

In [None]:
# Create train and test samples
embeds = np.load('embeds/embeds.npy')
labels = np.load('embeds/labels.npy')
X_train, X_test, y_train, y_test = model_selection.train_test_split(embeds, labels, test_size=0.2, random_state=42)

# Create SVM classifier
classifier = svm.SVC(C=1, probability=True)
classifier.fit(X_train, y_train)

# Get predictions
pred = classifier.predict(X_test)
proba = classifier.predict_proba(X_test)
print('Predictions:')
print(pred)
print('Ground truth:')
print(y_test)

# Show metrics
fpr, tpr, _ = metrics.roc_curve(y_test, proba[:,0], pos_label=0)
roc_auc = metrics.auc(fpr, tpr)
print('roc_auc: ', roc_auc)
acc = metrics.accuracy_score(y_test, pred)
print('acc: ', acc)
f1 = metrics.f1_score(y_test, pred, average='binary')
print('f1: ', f1)

# Plot ROC-AUC curve
plt.style.use('seaborn')
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

## 4. Fine-tuning VGGFace

In [10]:
# Define the device where we will train on
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

cuda:0


In [11]:
def save_checkpoint(model: nn.Module, optimizer: torch.optim.Optimizer,
                    epoch: int, history: list, best_val_acc: float):
    """ Saves checkpoint to .pth file in /checkpoints directory

    Args:
        model: Neural model
        optimizer: Optimizer (Exmpl: Adam)
        epoch: Epochs amount
        history: List of model metrics
        best_val_acc: Best accuracy on validation set
    """
    dt = datetime.datetime.now()
    dt_string = dt.strftime("%d-%m-%Y %H-%M")
    pathname_pattern = f"checkpoints/{dt_string} {epoch}-epoch.pth"
    with open(pathname_pattern, 'wb') as f:
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'epoch': epoch,
            'history': history,
            'best_val_acc': best_val_acc
        }, f)


def load_checkpoint(path: str, model: nn.Module, optimizer: torch.optim.Optimizer) -> tuple[int, list, float]:
    """ Loads checkpoint from .pth file. Configures model and optimizer

    Args:
        path: Path to checkpoint
        model: Neural model (Same that used in saved checkpoint)
        optimizer: Optimizer (Same that used in saved checkpoint)

    Returns:
        List of number of epochs, history, best validation accuracy
    """
    with open(path, 'rb') as f:
        checkpoint = torch.load(f)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch'] + 1
    history = checkpoint['history']
    best_val_acc = checkpoint['best_val_acc']
    return epoch, history, best_val_acc


def epoch_fit(model: torch.nn.Module,
              train_dataloader: data.DataLoader,
              optimizer: torch.optim.Optimizer,
              criterion: torch.nn.CrossEntropyLoss) -> tuple[float, float]:
    """ Training model on 1 epoch

    Args:
        model: Neural model
        train_dataloader: Train dataloader
        optimizer: Optimizer
        criterion: Loss function

    Returns:
        List of epoch loss, epoch accuracy
    """
    # Switch model to train mode
    model.train()

    epoch_loss = 0.0
    epoch_acc = 0.0
    num_of_batches = len(train_dataloader)
    num_of_elems = len(train_dataloader.dataset)

    for inputs, labels in train_dataloader:
        # Send tensors to DEVICE
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        # Reset gradients to zero
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Backward pass
        loss = criterion(outputs, labels)
        loss.backward()

        # Gradient step
        optimizer.step()

        # Accumulate loss and accuracy
        epoch_loss += loss.item()
        preds = torch.argmax(outputs, 1)
        epoch_acc += sum(preds == labels.data)

    # Calculate average values
    epoch_loss /= num_of_batches
    epoch_acc /= num_of_elems
    return epoch_loss, epoch_acc.cpu()


def epoch_eval(model: nn.Module,
               val_dataloader: data.DataLoader,
               criterion: torch.nn.CrossEntropyLoss) -> tuple[float, float]:
    """ Validate model on 1 epoch

    Args:
        model: Neural model
        val_dataloader: Validation dataloader
        criterion: Loss function

    Returns:
        List of validation loss, validation accuracy
    """
    # Switch model to evaluation mode
    model.eval()

    val_loss = 0.0
    val_acc = 0.0
    num_of_batches = len(val_dataloader)
    num_of_elems = len(val_dataloader.dataset)

    for inputs, labels in val_dataloader:
        # Send tensors to DEVICE
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        # Getting model outputs without calculating gradients
        with torch.no_grad():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Accumulate validation loss and accuracy
            val_loss += loss.item()
            preds = torch.argmax(outputs, 1)
            val_acc += torch.sum(preds == labels.data)

    # Calculate average values
    val_loss /= num_of_batches
    val_acc /= num_of_elems
    return val_loss, val_acc.cpu()


def train(model: nn.Module,
          train_dataloader: data.DataLoader,
          val_dataloader: data.DataLoader,
          epochs: int,
          checkpoint_path: str = None) -> list:
    """ Train loop
        Saves weights of best model in "models/best_model.pth"
        
    Args:
        model: Neural model
        train_dataloader: Train dataloader
        val_dataloader: Validation dataloader
        epochs: Number of epochs
        checkpoint_path: Path to checkpoint to continue from

    Returns:
        Matrix of metrics and loss on train and val sets for each epoch: [train_loss, train_acc, val_loss, val_acc]
    """
    # Switch model to train mode
    model.train()

    # Define training parameters
    optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.01, amsgrad=True)
    criterion = torch.nn.CrossEntropyLoss()
    history = []
    start_epoch = 0
    best_val_acc = 0.0
    log_template = "\nEpoch {ep:03d} train_loss: {t_loss:0.4f} \
                    val_loss {v_loss:0.4f} train_acc {t_acc:0.4f} val_acc {v_acc:0.4f}"

    # Load checkpoint
    if checkpoint_path is not None:
        start_epoch, history, best_val_acc = load_checkpoint(checkpoint_path, model, optimizer)
        print(f'Checkpoint {checkpoint_path} loaded!')

    # Train loop
    for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', position=0, leave=True):
        # Training
        train_loss, train_acc = epoch_fit(model, train_dataloader, optimizer, criterion)
        # Validation
        val_loss, val_acc = epoch_eval(model, val_dataloader, criterion)
        history.append((train_loss, train_acc, val_loss, val_acc))

        # Create checkpoint every 5 epochs
        if epoch % 5 == 0:
            save_checkpoint(model, optimizer, epoch, history, best_val_acc)

        # Saves best model
        if best_val_acc < val_acc:
            torch.save(model.state_dict(), "models/best_model.pth")
            best_val_acc = val_acc

        tqdm.write(log_template.format(ep=epoch, t_loss=train_loss,
                                       v_loss=val_loss, t_acc=train_acc, v_acc=val_acc))
    return history


def test(model: nn.Module, test_dataloader: data.DataLoader, with_plot: bool = True) -> tuple[float, float, float]:
    """ Testing model

    Args:
        model: Neural model
        test_dataloader: Test dataloader
        with_plot: True - plot a ROC-AUC curve

    Returns:
        List of metrics: [roc_auc, acc, f1]
    """
    # Switch model to evaluating mode
    model.eval()
    y_true = []

    # Test loop
    with torch.no_grad():
        logits = []
        pred_labels = []
        for inputs, labels in tqdm(test_dataloader):
            # Send tensors to DEVICE
            inputs = inputs.to(DEVICE)
            outputs = model(inputs).cpu()

            # Accumulate logits
            logits.append(outputs)
            y_true = list(itertools.chain(y_true, labels))
            pred_labels.append(*torch.argmax(outputs, dim=1).data)

    # Get model probability predictions
    probs = nn.functional.softmax(torch.cat(logits), dim=-1).numpy()
    pred_probs = probs[:, 0]

    # Getting metrics
    fpr, tpr, _ = metrics.roc_curve(y_true, pred_probs, pos_label=0)
    roc_auc = metrics.auc(fpr, tpr)
    acc = metrics.accuracy_score(y_true, pred_labels)
    f1 = metrics.f1_score(y_true, pred_labels, average='binary')

    # Plot ROC-AUC curve
    if with_plot:
        plt.style.use('seaborn')
        plt.title('Receiver Operating Characteristic')
        plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
        plt.legend(loc='lower right')
        plt.plot([0, 1], [0, 1], 'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.show()

    return roc_auc, acc, f1

In [12]:
def get_fine_tuned_model():
    """ Get fine-tuned model

    Returns:
        Pretrained fine-tuned model
    """
    # Build VGGFace model and load pre-trained weights
    model = VGGFace()
    model_dict = torch.load('models/vggface.pth', map_location=lambda storage, loc: storage)
    model.load_state_dict(model_dict)
    model.eval()

    # Freezing weights
    for param in model.parameters():
        param.requires_grad = False

    # Fine-tuning
    model.fc['fc8'] = nn.Linear(in_features=4096, out_features=2)
    return model


def get_datasets(path: str, transform: transforms = None, with_val: bool = False, seed: int = None):
    """ Get train, test and val dataset

        Args:
            path: Path to dataset
            transform: Dataset transformation (augmentation)
            with_val: Divide dataset into train, test, val with "True" value.
                      Otherwise return only train and test
            seed: Seed for splitting

        Returns:
            Divided dataset into train, test, (val)
    """
    general_dataset = datasets.ImageFolder(path, transform=transform)
    dataset_size = len(general_dataset)

    if seed is not None:
        torch.manual_seed(seed)

    if with_val: # Train, test, val
        train_dataset, test_val_dataset = data.random_split(general_dataset,
                                                        (ceil(dataset_size * 0.6), floor(dataset_size * 0.4)))
        test_val_size = len(test_val_dataset)
        test_dataset, val_dataset = data.random_split(test_val_dataset, (ceil(test_val_size * 0.5),
                                                                        floor(test_val_size * 0.5)))
        return train_dataset, test_dataset, val_dataset
    else:   # Train, test
        train_dataset, test_dataset = data.random_split(general_dataset,
                                                        (ceil(dataset_size * 0.8), floor(dataset_size * 0.2)))
        return train_dataset, test_dataset

    
def history_plot(history: list):
    """ Plots a history graph

    Args:
        history: List of values consisted of loss, accuracy
                 on train and val samples
    """
    loss, acc, val_loss, val_acc = zip(*history)

    plt.style.use('seaborn')
    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)

    ax1.plot(loss, label="train loss")
    ax1.plot(val_loss, label="val loss")
    ax1.legend()
    ax1.set_ylabel('Loss')

    ax2.plot(acc, label='train accuracy')
    ax2.plot(val_acc, label='val accuracy')
    ax2.legend()
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')

    plt.tight_layout()
    plt.show()

In [None]:
print(f'Train on {DEVICE}')
batch_size = 20
print(f'Batch size: {batch_size}')

# Getting train, test, val datasets
train_dataset, test_dataset, val_dataset = get_datasets('dataset',
                                                        transform=transforms.Compose([transforms.ToTensor()]),
                                                        with_val=True,
                                                        seed=42)
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = data.DataLoader(test_dataset, shuffle=False)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Getting model
model = get_fine_tuned_model()
print(model, '\n')
print(f'All layers: {len(model.state_dict())}')
print('Activated layers:')
for i, layer in enumerate(model.parameters(), start=1):
    if layer.requires_grad:
        print(i, layer.requires_grad)

# Train model
model.train()
model.to(DEVICE)
history = train(model=model,
                train_dataloader=train_dataloader,
                val_dataloader=val_dataloader,
                epochs=51,
                checkpoint_path=None)
np.save('history.npy', history)     # Save loss and accuracy

# Plotting history
# history = np.load('history.npy')
history_plot(history)

# Load weights of best fine-tuned model
# with open('models/best_model.pth', 'rb') as f:
#     dictw = torch.load(f)
#     model.load_state_dict(dictw)

# Calculate metrics
print('\nCalculate metrics:')
model.to(DEVICE)
roc_auc, acc, f1_score = test(model, test_dataloader)
print(f'Roc-auc: {roc_auc}')
print(f'Accuracy: {acc}')
print(f'F1-score: {f1_score}')

## 5. Cross-validation

In [13]:
def ft_cross_val(general_dataset: data.Dataset, epochs: int, batch_size: int):
    """ Cross-validation of fine-tuned model

    Args:
        general_dataset: Dataset
        epochs: Amount of epochs
        batch_size: Size of batch

    Returns:
        List of model metrics on each fold: [roc-auc, acc, f1]
    """
    # Divide into folds
    kfold = KFold(n_splits=5, shuffle=True)
    results = []

    # Train and validate ft_model on folds
    for fold, (train_datasplit, test_datasplit) in enumerate(kfold.split(general_dataset)):
        print(f'FOLD #{fold}')
        train_dataloader = data.DataLoader(general_dataset, batch_size, shuffle=False, sampler=train_datasplit)
        test_dataloader = data.DataLoader(general_dataset, 1, shuffle=False, sampler=test_datasplit)

        model = get_fine_tuned_model()
        model.to(DEVICE)
        train(model, train_dataloader, test_dataloader, epochs)
        # with open('models/best_model.pth', 'rb') as f:
        #     dictw = torch.load(f)
        #     model.load_state_dict(dictw)
        roc_auc, acc, f1 = test(model, test_dataloader, False)
        print(f'ROC AUC: {roc_auc}, Acc: {acc}, F1: {f1}')
        results.append([roc_auc, acc, f1])

    print('All scores: ', results)
    average = np.average(results, axis=0)
    print(f'K-fold CV average result.\nROC-AUC: {average[0]}, Acc: {average[1]}, F1: {average[2]}')
    return results


def svm_cross_val(X_src: str, y_src: str):
    """ Cross-validation of SVM model

    Args:
        X_src: Path to embeddings
        y_src: Path to labels

    Returns:
        List of model metrics on each fold: [roc-auc, acc, f1]
    """
    X = np.load(X_src)
    y = np.load(y_src)
    print(f'Size of general dataset: {X.shape}')
    classifier = SVC(C=0.3)
    scoring = ['roc_auc', 'accuracy', 'f1']
    result = cross_validate(classifier, X, y, cv=5, scoring=scoring, return_train_score=False)
    return result


def print_cv_res(cv_res_path: str):
    """ Prints average result of cross-validation

    Args:
        cv_res_path: Path to list of model metrics
    """
    ft_cv_res = np.load(cv_res_path)
    print('All scores: ', ft_cv_res)
    average = np.average(ft_cv_res, axis=0)
    print(average.shape)
    print(f'K-fold CV average result.\nROC-AUC: {average[0]}, Acc: {average[1]}, F1: {average[2]}')

In [20]:
print('--- SVM CV ---')
result = svm_cross_val('embeds/embeds.npy', 'embeds/labels.npy')
print(result)
roc_auc, acc, f1 = result['test_roc_auc'], result['test_accuracy'], result['test_f1']
np.save('svm_cv_res.npy', [roc_auc, acc, f1])
print(f'K-fold CV average result.\nROC-AUC: {np.average(roc_auc)}, Acc: {np.average(acc)}, F1: {np.average(f1)}')

--- SVM CV ---
Size of general dataset: (926, 4096)
{'fit_time': array([1.64290166, 1.32257819, 1.35756707, 1.35156679, 1.37156034]), 'score_time': array([1.18165827, 1.10364413, 1.08065391, 1.11464405, 1.12863636]), 'test_roc_auc': array([0.80209661, 0.65967666, 0.69218165, 0.75386236, 0.80348783]), 'test_accuracy': array([0.70967742, 0.64864865, 0.65405405, 0.71351351, 0.72432432]), 'test_f1': array([0.67857143, 0.67005076, 0.65591398, 0.70056497, 0.68711656])}
K-fold CV average result.
ROC-AUC: 0.7422610209590401, Acc: 0.6900435919790758, F1: 0.6784435409311925


In [21]:
print('--- Fine-tuned CV ---')
print(f'Train on {DEVICE}')
transform = transforms.Compose([transforms.ToTensor()])
general_dataset = datasets.ImageFolder('dataset', transform=transform)
ft_cv_res = ft_cross_val(general_dataset, 30, 16)
np.save('ft_cv_res_30_epochs.npy', ft_cv_res)

--- Fine-tuned CV ---
Train on cuda:0
FOLD #0


Epoch:   3%|▎         | 1/30 [00:25<12:19, 25.48s/it]


Epoch 000 train_loss: 0.7414                     val_loss 0.6845 train_acc 0.4104 val_acc 0.1058


Epoch:   3%|▎         | 1/30 [00:28<13:44, 28.42s/it]


KeyboardInterrupt: 