In [None]:
import sys
sys.path.append('/tf/data')

import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms

from general_func import load_dataset
from Classification.class_functions import split_ds, concat_data
from Classification.conv_net_model import convnext_large

from sklearn.metrics import precision_score, recall_score, f1_score

In [None]:
params = {
    'best_model_path': '/tf/data/Classification/ConvNeXt/Grid_Search3/Search_2/29/Epoch_026.zip', #Top model from second grid
    'images_root_path': '/tf/data/augmented_64_3ch/',
    'run': 'Model_1',
    'model_save_path': '/tf/data/Classification/ConvNeXt/',
    'model_save_freq_epochs': 1,

    'num_epochs': 150,
    'learning_rate': 0.004,
    'weight_decay': 0.005,
    'warmup_epochs': 0,
    'early_stop': 25,

    'batch_size': 64,

    'loader_workers': 2,

    'drop_rate': 0.5,
    'apply_class_weights': True,

    #RandAUG
    'num_ops': 8,
    'magnitude': 10,

    #GridMask
    'offset': False, #False: square = 0, True: Square = noise
    'ratio': 0.5, #how much image to keep
    'mode': 1, #0 = keep squares, 1 = cut squares
    'prob': 0.7 #Probability to apply transformation 
    }

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
train_transform = transforms.Compose([])
val_transform = transforms.Compose([])

convnext_net = convnext_large(pretrained=False, in_22k=False, transform_train=train_transform, transform_val=val_transform, num_classes=2, drop_path_rate = params['drop_rate'])
model_weights = torch.load(params['best_model_path'])
convnext_net.load_state_dict(model_weights)

convnext_net.to(device)
convnext_net.eval()

In [None]:
ds_pos = load_dataset(custom_path='/tf/data/cropped/test data/1')
ds_neg = load_dataset(custom_path='/tf/data/cropped/test data/0')

train_scans_pos, _, _ = split_ds(ds_pos, train_split = 1, val_split = 0, seed = None)
train_scans_neg, _, _ = split_ds(ds_neg, train_split = 1, val_split = 0, seed = None)
test_loader = concat_data(train_scans_pos, train_scans_neg, batch_size=params['batch_size'], workers=params['loader_workers'])

if params['apply_class_weights'] == True:
    # Calculate class weights
    num_class_1_samples = len(ds_pos)
    num_class_0_samples = len(ds_neg)
    total_samples = num_class_1_samples + num_class_0_samples
    class_weights = torch.tensor([total_samples / num_class_0_samples, total_samples / num_class_1_samples], dtype=torch.float).to(device)
else:
    class_weights = None
criterion = nn.CrossEntropyLoss(weight=class_weights)

In [5]:
def model_test_metrics1(model, test_loader, criterion, cutoff=0):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_outputs = []
    sure_labels = []
    sure_outputs = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            test_loss += criterion(outputs, labels).item()

            _, predicted = torch.max(outputs, 1)
            true_labels = labels
            total += labels.size(0)
            correct += (predicted == true_labels).sum().item()

            all_labels.extend(true_labels.cpu().numpy())
            all_outputs.extend(outputs.cpu().numpy())

            # Calculate absolute difference between logits
            differences = torch.abs(outputs[:, 0] - outputs[:, 1]).cpu().numpy()

            # Determine which samples are "sure"
            for i, diff in enumerate(differences):
                if diff >= cutoff:
                    sure_labels.append(true_labels[i].cpu().numpy())
                    sure_outputs.append(outputs[i].cpu().numpy())

    # Filter out unsure cases
    sure_labels = np.array(sure_labels)
    sure_outputs = np.array(sure_outputs)
    if sure_outputs.size > 0:
        sure_predicted = np.argmax(sure_outputs, axis=1)
    else:
        sure_predicted = np.array([])

    # Compute accuracy
    accuracy = 100 * np.sum(sure_predicted == sure_labels) / len(sure_labels) if len(sure_labels) > 0 else 0

    # Compute Precision, Recall, and F1-score
    precision = precision_score(sure_labels, sure_predicted)
    recall = recall_score(sure_labels, sure_predicted)
    f1 = f1_score(sure_labels, sure_predicted)

    # Print statistics
    print(f"Test Loss: {test_loss / len(test_loader):.4f}, "
          f"Test Accuracy: {accuracy:.2f}%, "
          f"Precision: {precision:.4f}, "
          f"Recall: {recall:.4f}, "
          f"F1-score: {f1:.4f}, "
          f"Sure cases: {len(sure_labels)} , "
          f"Unsure cases: {len(all_labels) - len(sure_labels)}")

   
    return (test_loss / len(test_loader)), accuracy, precision, recall, f1, len(sure_labels), len(all_labels) - len(sure_labels)


In [6]:
results = model_test_metrics1(convnext_net, test_loader, criterion)

Test Loss: 0.5266, Test Accuracy: 81.68%, Precision: 0.7727, Recall: 0.8763, F1-score: 0.8213, Sure cases: 404 , Unsure cases: 0


In [14]:
results = model_test_metrics1(convnext_net, test_loader, criterion, cutoff = 3.29)

Test Loss: 0.5135, Test Accuracy: 92.57%, Precision: 0.9342, Recall: 0.8765, F1-score: 0.9045, Sure cases: 202 , Unsure cases: 202
