In [None]:
import torch
from torchvision import models
from torch import nn
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import (
    roc_curve, accuracy_score, recall_score, precision_score, f1_score,
    confusion_matrix, roc_auc_score
)
import matplotlib.pyplot as plt
import joblib
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from statsmodels.stats.proportion import proportion_confint
from sklearn.metrics import (
    roc_auc_score, roc_curve, precision_recall_curve,
    confusion_matrix, classification_report, average_precision_score
)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
study = joblib.load("found hyperparameters")
best_params = study.best_params
dropout_rate = best_params['dropout']
batch_size = best_params['batch_size']

num_classes = 2

In [None]:
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = False

in_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Dropout(p=dropout_rate),
    nn.Linear(in_features, num_classes)
)
model = model.to(device)

In [None]:
model.load_state_dict(torch.load("your own trained best model"))
model.eval()

In [None]:
valid_dir = "your own validation or test dataset path"  

valid_transform = transforms.Compose([transforms.Resize((456, 456)), 
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])

valid_dataset = datasets.ImageFolder(root=valid_dir, transform=valid_transform)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [None]:
all_probs, all_labels = [], []

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        probs = F.softmax(outputs, dim=1)[:, 1]  

        all_probs.extend(probs.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_probs = np.array(all_probs)
all_labels = np.array(all_labels)

In [None]:
fpr, tpr, thresholds = roc_curve(all_labels, all_probs)
youden_index = tpr - fpr
best_threshold = thresholds[np.argmax(youden_index)]

print(f"Best threshold by Youden's index: {best_threshold:.4f}")

In [None]:
pred_labels = (all_probs >= best_threshold).astype(int)

In [None]:
def bootstrap_ci(metric_func, labels, preds, n_bootstraps=1000, alpha=0.05):
    rng = np.random.RandomState(42)
    scores = []
    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(labels), len(labels))
        if len(np.unique(labels[indices])) < 2:
            continue
        score = metric_func(labels[indices], preds[indices])
        scores.append(score)
    scores = np.array(scores)
    lower = np.percentile(scores, 100 * alpha / 2)
    upper = np.percentile(scores, 100 * (1 - alpha / 2))
    return lower, upper

In [None]:
auc = roc_auc_score(all_labels, all_probs)
auc_ci = bootstrap_ci(roc_auc_score, all_labels, all_probs)

ap_score = average_precision_score(all_labels, all_probs)
ap_ci = bootstrap_ci(average_precision_score, all_labels, all_probs)

In [None]:
cm = confusion_matrix(all_labels, pred_labels)