## AMi-Br Test Set

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from sklearn.metrics import (
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)
import matplotlib.pyplot as plt
from huggingface_hub import login
import timm
from torchvision import transforms
from scipy.interpolate import interp1d

# Logging
log_file = "hoptimus0_linear_probe_inference.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")  # replace with your token

# Load H-Optimus-0 model (frozen backbone, as in training)
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform for H-Optimus-0
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Embedding extractor (same as in training)
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset for inference
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return embedding, label

# Classifier head: 1-layer linear probe (matches training script)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        # Single linear layer from 1536-dim embedding to 1 logit
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
os.makedirs("pr_curves", exist_ok=True)
fold_probs_dict = {}
fold_bal_accs, fold_aurocs, fold_pr_aucs = [], [], []
all_precisions, all_recalls = [], []

# Inference loop over 5 folds
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    # Load full model saved during training (with 1-layer linear head)
    model = torch.load(
        f"hoptimus0_linear_probe_fold_{fold}_best.pth",
        map_location=device,
        weights_only=False
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)
    precision, recall, _ = precision_recall_curve(true_labels, fold_probs)
    pr_auc = average_precision_score(true_labels, fold_probs)

    logger.info(
        f"Fold {fold} - Balanced Accuracy: {bal_acc:.4f}, "
        f"AUROC: {auroc:.4f}, PR AUC: {pr_auc:.4f}"
    )
    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)
    fold_pr_aucs.append(pr_auc)
    all_precisions.append(precision)
    all_recalls.append(recall)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist()
    }

    # PR curve per fold
    plt.figure()
    plt.plot(recall, precision, label=f"Fold {fold} (AP = {pr_auc:.4f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve - Fold {fold}")
    plt.grid(True)
    plt.legend()
    plt.savefig(f"pr_curves/hoptimus0_amibr_pr_curve_fold_{fold}.png")
    plt.close()

    del model
    gc.collect()
    if device.type == "cuda":
        torch.cuda.empty_cache()

# Averaged PR curve across folds
rec_uniform = np.linspace(0, 1, 1000)
interp_prec_list = []

for prec, rec in zip(all_precisions, all_recalls):
    interp = interp1d(rec[::-1], prec[::-1], bounds_error=False, fill_value=0.0)
    interp_prec_list.append(interp(rec_uniform))

mean_precision = np.mean(interp_prec_list, axis=0)

plt.figure()
plt.plot(rec_uniform, mean_precision,
         label=f"Mean PR (Avg AUC = {np.mean(fold_pr_aucs):.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Average Precision-Recall Curve - H-Optimus-0 Linear Probing")
plt.grid(True)
plt.legend()
plt.savefig("pr_curves/hoptimus0_amibr_pr_curve_average.png")
plt.close()

# Summary
logger.info("\n--- Final Summary (H-Optimus-0 Linear Probing) ---")
logger.info(
    f"Balanced Accuracy: {np.mean(fold_bal_accs):.4f} ± {np.std(fold_bal_accs):.4f}"
)
logger.info(
    f"AUROC: {np.mean(fold_aurocs):.4f} ± {np.std(fold_aurocs):.4f}"
)
logger.info(
    f"PR AUC: {np.mean(fold_pr_aucs):.4f} ± {np.std(fold_pr_aucs):.4f}"
)

# Save predictions
with open("hoptimus0_amibr_test_predictions.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus0_amibr_test_predictions.pkl")


2025-12-11 18:00:26,050 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-0)
2025-12-11 18:00:30,435 - INFO - --- Fold 1 Inference ---
Fold 1: 100%|██████████| 826/826 [00:32<00:00, 25.12it/s]
2025-12-11 18:01:03,326 - INFO - Fold 1 - Balanced Accuracy: 0.6152, AUROC: 0.6700, PR AUC: 0.8627
2025-12-11 18:01:03,579 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 826/826 [00:32<00:00, 25.41it/s]
2025-12-11 18:01:36,098 - INFO - Fold 2 - Balanced Accuracy: 0.6130, AUROC: 0.6595, PR AUC: 0.8518
2025-12-11 18:01:36,360 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 826/826 [00:32<00:00, 25.58it/s]
2025-12-11 18:02:08,662 - INFO - Fold 3 - Balanced Accuracy: 0.6429, AUROC: 0.6804, PR AUC: 0.8703
2025-12-11 18:02:08,903 - INFO - --- Fold 4 Inference ---
Fold 4: 100%|██████████| 826/826 [00:32<00:00, 25.37it/s]
2025-12-11 18:02:41,475 - INFO - Fold 4 - Balanced Accuracy: 0.6373, AUROC: 0.6775, PR AUC: 0.8638
2025-12-11 18:02:41,737 - INFO - -

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from huggingface_hub import login
import timm
from torchvision import transforms

# Logging
log_file = "hoptimus0_linear_probe_sens_spec.log"
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")  # replace with your token

# Load H-Optimus-0 model (frozen backbone, as in training)
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform for H-Optimus-0
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Embedding extractor (same as in training)
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset for inference
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return embedding, label

# Classifier head: 1-layer linear probe (matches training script)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
fold_probs_dict = {}

# Per-class sensitivity/specificity across folds
fold_sens_atypical = []
fold_spec_atypical = []
fold_sens_normal = []
fold_spec_normal = []

# Inference loop over 5 folds
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    # Load full model saved during training (with 1-layer linear head)
    model = torch.load(
        f"hoptimus0_linear_probe_fold_{fold}_best.pth",
        map_location=device,
        weights_only=False
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    # Atypical (class 0) as positive
    tp_atyp = np.sum((fold_preds == 0) & (true_labels == 0))
    fn_atyp = np.sum((fold_preds == 1) & (true_labels == 0))
    tn_atyp = np.sum((fold_preds == 1) & (true_labels == 1))
    fp_atyp = np.sum((fold_preds == 0) & (true_labels == 1))

    sens_atypical = tp_atyp / (tp_atyp + fn_atyp) if (tp_atyp + fn_atyp) > 0 else 0.0
    spec_atypical = tn_atyp / (tn_atyp + fp_atyp) if (tn_atyp + fp_atyp) > 0 else 0.0

    # Normal (class 1) as positive
    tp_norm = np.sum((fold_preds == 1) & (true_labels == 1))
    fn_norm = np.sum((fold_preds == 0) & (true_labels == 1))
    tn_norm = np.sum((fold_preds == 0) & (true_labels == 0))
    fp_norm = np.sum((fold_preds == 1) & (true_labels == 0))

    sens_normal = tp_norm / (tp_norm + fn_norm) if (tp_norm + fn_norm) > 0 else 0.0
    spec_normal = tn_norm / (tn_norm + fp_norm) if (tn_norm + fp_norm) > 0 else 0.0

    logger.info(
        f"Fold {fold} - "
        f"Atypical (0): Sensitivity={sens_atypical:.4f}, Specificity={spec_atypical:.4f} | "
        f"Normal (1): Sensitivity={sens_normal:.4f}, Specificity={spec_normal:.4f}"
    )

    fold_sens_atypical.append(sens_atypical)
    fold_spec_atypical.append(spec_atypical)
    fold_sens_normal.append(sens_normal)
    fold_spec_normal.append(spec_normal)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist(),
        "sens_atypical": sens_atypical,
        "spec_atypical": spec_atypical,
        "sens_normal": sens_normal,
        "spec_normal": spec_normal,
    }

    del model
    gc.collect()
    if device.type == "cuda":
        torch.cuda.empty_cache()

# Summary
logger.info("\n--- Final Summary (H-Optimus-0 Linear Probing, threshold=0.5) ---")
logger.info(
    "Atypical (class 0) - Sensitivity: "
    f"{np.mean(fold_sens_atypical):.4f} ± {np.std(fold_sens_atypical):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_atypical):.4f} ± {np.std(fold_spec_atypical):.4f}"
)
logger.info(
    "Normal (class 1)   - Sensitivity: "
    f"{np.mean(fold_sens_normal):.4f} ± {np.std(fold_sens_normal):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_normal):.4f} ± {np.std(fold_spec_normal):.4f}"
)

# Save predictions + metrics
with open("hoptimus0_amibr_test_predictions_sens_spec.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus0_amibr_test_predictions_sens_spec.pkl")


2025-12-12 14:56:21,528 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-0)
2025-12-12 14:56:27,432 - INFO - --- Fold 1 Inference ---
Fold 1: 100%|██████████| 826/826 [00:46<00:00, 17.86it/s]
2025-12-12 14:57:13,682 - INFO - Fold 1 - Atypical (0): Sensitivity=0.5326, Specificity=0.6978 | Normal (1): Sensitivity=0.6978, Specificity=0.5326
2025-12-12 14:57:13,862 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 826/826 [00:42<00:00, 19.65it/s]
2025-12-12 14:57:55,900 - INFO - Fold 2 - Atypical (0): Sensitivity=0.4783, Specificity=0.7477 | Normal (1): Sensitivity=0.7477, Specificity=0.4783
2025-12-12 14:57:56,069 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 826/826 [00:32<00:00, 25.60it/s]
2025-12-12 14:58:28,334 - INFO - Fold 3 - Atypical (0): Sensitivity=0.5380, Specificity=0.7477 | Normal (1): Sensitivity=0.7477, Specificity=0.5380
2025-12-12 14:58:28,503 - INFO - --- Fold 4 Inference ---
Fold 4: 100%|██████████| 826/826 [00:32<00:0

## AtNorM-Br

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from sklearn.metrics import (
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)
import matplotlib.pyplot as plt
from huggingface_hub import login
import timm
from torchvision import transforms
from scipy.interpolate import interp1d

# Logging
log_file = "hoptimus0_linear_probe_inference.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")  # replace with your token

# Load H-Optimus-0 model
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform for H-Optimus-0
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Embedding extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return embedding, label

# Classifier head: 1-layer linear probe (matches training)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
os.makedirs("pr_curves", exist_ok=True)
fold_probs_dict = {}
fold_bal_accs, fold_aurocs, fold_pr_aucs = [], [], []
all_precisions, all_recalls = [], []

# Inference loop
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(
        f"hoptimus0_linear_probe_fold_{fold}_best.pth",
        map_location=device
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)
    precision, recall, _ = precision_recall_curve(true_labels, fold_probs)
    pr_auc = average_precision_score(true_labels, fold_probs)

    logger.info(
        f"Fold {fold} - Balanced Accuracy: {bal_acc:.4f}, "
        f"AUROC: {auroc:.4f}, PR AUC: {pr_auc:.4f}"
    )
    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)
    fold_pr_aucs.append(pr_auc)
    all_precisions.append(precision)
    all_recalls.append(recall)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist()
    }

    # PR curve
    plt.figure()
    plt.plot(recall, precision, label=f"Fold {fold} (AP = {pr_auc:.4f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve - Fold {fold}")
    plt.grid(True)
    plt.legend()
    plt.savefig(f"pr_curves/hoptimus0_atnorm-br_pr_curve_fold_{fold}.png")
    plt.close()

    del model
    gc.collect()
    if device.type == "cuda":
        torch.cuda.empty_cache()

# Averaged PR curve
rec_uniform = np.linspace(0, 1, 1000)
interp_prec_list = []

for prec, rec in zip(all_precisions, all_recalls):
    interp = interp1d(rec[::-1], prec[::-1], bounds_error=False, fill_value=0.0)
    interp_prec_list.append(interp(rec_uniform))

mean_precision = np.mean(interp_prec_list, axis=0)

plt.figure()
plt.plot(rec_uniform, mean_precision,
         label=f"Mean PR (Avg AUC = {np.mean(fold_pr_aucs):.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Average Precision-Recall Curve - H-Optimus-0 Linear Probing")
plt.grid(True)
plt.legend()
plt.savefig("pr_curves/hoptimus0_atnorm-br_pr_curve_average.png")
plt.close()

# Summary
logger.info("\n--- Final Summary (H-Optimus-0 Linear Probing) ---")
logger.info(
    f"Balanced Accuracy: {np.mean(fold_bal_accs):.4f} ± {np.std(fold_bal_accs):.4f}"
)
logger.info(
    f"AUROC: {np.mean(fold_aurocs):.4f} ± {np.std(fold_aurocs):.4f}"
)
logger.info(
    f"PR AUC: {np.mean(fold_pr_aucs):.4f} ± {np.std(fold_pr_aucs):.4f}"
)

# Save predictions
with open("hoptimus0_atnorm-br_test_predictions.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus0_atnorm-br_test_predictions.pkl")


2025-12-11 18:10:13,673 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-0)
2025-12-11 18:10:17,860 - INFO - --- Fold 1 Inference ---
  model = torch.load(
Fold 1: 100%|██████████| 746/746 [00:29<00:00, 25.02it/s]
2025-12-11 18:10:47,679 - INFO - Fold 1 - Balanced Accuracy: 0.6115, AUROC: 0.6869, PR AUC: 0.9129
2025-12-11 18:10:47,953 - INFO - --- Fold 2 Inference ---
  model = torch.load(
Fold 2: 100%|██████████| 746/746 [00:29<00:00, 25.49it/s]
2025-12-11 18:11:17,232 - INFO - Fold 2 - Balanced Accuracy: 0.6558, AUROC: 0.7313, PR AUC: 0.9289
2025-12-11 18:11:17,488 - INFO - --- Fold 3 Inference ---
  model = torch.load(
Fold 3: 100%|██████████| 746/746 [00:29<00:00, 25.35it/s]
2025-12-11 18:11:46,928 - INFO - Fold 3 - Balanced Accuracy: 0.6356, AUROC: 0.6975, PR AUC: 0.9166
2025-12-11 18:11:47,179 - INFO - --- Fold 4 Inference ---
  model = torch.load(
Fold 4: 100%|██████████| 746/746 [00:29<00:00, 25.23it/s]
2025-12-11 18:12:16,750 - INFO - Fold 4 - Bal

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from huggingface_hub import login
import timm
from torchvision import transforms

# Logging
log_file = "hoptimus0_linear_probe_sens_spec.log"
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")  # replace with your token

# Load H-Optimus-0 model (frozen backbone, as in training)
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform for H-Optimus-0
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Embedding extractor (same as in training)
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset for inference
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return embedding, label

# Classifier head: 1-layer linear probe (matches training script)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
fold_probs_dict = {}

# Per-class sensitivity/specificity across folds
fold_sens_atypical = []
fold_spec_atypical = []
fold_sens_normal = []
fold_spec_normal = []

# Inference loop over 5 folds
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    # Load full model saved during training (with 1-layer linear head)
    model = torch.load(
        f"hoptimus0_linear_probe_fold_{fold}_best.pth",
        map_location=device,
        weights_only=False
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    # Atypical (class 0) as positive
    tp_atyp = np.sum((fold_preds == 0) & (true_labels == 0))
    fn_atyp = np.sum((fold_preds == 1) & (true_labels == 0))
    tn_atyp = np.sum((fold_preds == 1) & (true_labels == 1))
    fp_atyp = np.sum((fold_preds == 0) & (true_labels == 1))

    sens_atypical = tp_atyp / (tp_atyp + fn_atyp) if (tp_atyp + fn_atyp) > 0 else 0.0
    spec_atypical = tn_atyp / (tn_atyp + fp_atyp) if (tn_atyp + fp_atyp) > 0 else 0.0

    # Normal (class 1) as positive
    tp_norm = np.sum((fold_preds == 1) & (true_labels == 1))
    fn_norm = np.sum((fold_preds == 0) & (true_labels == 1))
    tn_norm = np.sum((fold_preds == 0) & (true_labels == 0))
    fp_norm = np.sum((fold_preds == 1) & (true_labels == 0))

    sens_normal = tp_norm / (tp_norm + fn_norm) if (tp_norm + fn_norm) > 0 else 0.0
    spec_normal = tn_norm / (tn_norm + fp_norm) if (tn_norm + fp_norm) > 0 else 0.0

    logger.info(
        f"Fold {fold} - "
        f"Atypical (0): Sensitivity={sens_atypical:.4f}, Specificity={spec_atypical:.4f} | "
        f"Normal (1): Sensitivity={sens_normal:.4f}, Specificity={spec_normal:.4f}"
    )

    fold_sens_atypical.append(sens_atypical)
    fold_spec_atypical.append(spec_atypical)
    fold_sens_normal.append(sens_normal)
    fold_spec_normal.append(spec_normal)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist(),
        "sens_atypical": sens_atypical,
        "spec_atypical": spec_atypical,
        "sens_normal": sens_normal,
        "spec_normal": spec_normal,
    }

    del model
    gc.collect()
    if device.type == "cuda":
        torch.cuda.empty_cache()

# Summary
logger.info("\n--- Final Summary (H-Optimus-0 Linear Probing, threshold=0.5) ---")
logger.info(
    "Atypical (class 0) - Sensitivity: "
    f"{np.mean(fold_sens_atypical):.4f} ± {np.std(fold_sens_atypical):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_atypical):.4f} ± {np.std(fold_spec_atypical):.4f}"
)
logger.info(
    "Normal (class 1)   - Sensitivity: "
    f"{np.mean(fold_sens_normal):.4f} ± {np.std(fold_sens_normal):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_normal):.4f} ± {np.std(fold_spec_normal):.4f}"
)

# Save predictions + metrics
with open("hoptimus0_atnorm-br_test_predictions_sens_spec.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus0_atnorm-br_test_predictions_sens_spec.pkl")


2025-12-12 15:07:48,447 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-0)
2025-12-12 15:07:52,683 - INFO - --- Fold 1 Inference ---
Fold 1: 100%|██████████| 746/746 [00:29<00:00, 25.25it/s]
2025-12-12 15:08:22,232 - INFO - Fold 1 - Atypical (0): Sensitivity=0.5078, Specificity=0.7152 | Normal (1): Sensitivity=0.7152, Specificity=0.5078
2025-12-12 15:08:22,413 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 746/746 [00:29<00:00, 25.50it/s]
2025-12-12 15:08:51,672 - INFO - Fold 2 - Atypical (0): Sensitivity=0.6562, Specificity=0.6553 | Normal (1): Sensitivity=0.6553, Specificity=0.6562
2025-12-12 15:08:51,852 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 746/746 [00:29<00:00, 25.28it/s]
2025-12-12 15:09:21,366 - INFO - Fold 3 - Atypical (0): Sensitivity=0.5625, Specificity=0.7087 | Normal (1): Sensitivity=0.7087, Specificity=0.5625
2025-12-12 15:09:21,546 - INFO - --- Fold 4 Inference ---
Fold 4: 100%|██████████| 746/746 [00:29<00:0

## AtNorM-MD

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from sklearn.metrics import (
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)
import matplotlib.pyplot as plt
from huggingface_hub import login
import timm
from torchvision import transforms
from scipy.interpolate import interp1d

# Logging
log_file = "hoptimus0_linear_probe_inference.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")  # replace with your token

# Load H-Optimus-0 model
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform for H-Optimus-0
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Embedding extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return embedding, label

# Classifier head: 1-layer linear probe
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
os.makedirs("pr_curves", exist_ok=True)
fold_probs_dict = {}
fold_bal_accs, fold_aurocs, fold_pr_aucs = [], [], []
all_precisions, all_recalls = [], []

# Inference loop
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(
        f"hoptimus0_linear_probe_fold_{fold}_best.pth",
        map_location=device
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)
    precision, recall, _ = precision_recall_curve(true_labels, fold_probs)
    pr_auc = average_precision_score(true_labels, fold_probs)

    logger.info(
        f"Fold {fold} - Balanced Accuracy: {bal_acc:.4f}, "
        f"AUROC: {auroc:.4f}, PR AUC: {pr_auc:.4f}"
    )
    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)
    fold_pr_aucs.append(pr_auc)
    all_precisions.append(precision)
    all_recalls.append(recall)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist()
    }

    # PR curve
    plt.figure()
    plt.plot(recall, precision, label=f"Fold {fold} (AP = {pr_auc:.4f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve - Fold {fold}")
    plt.grid(True)
    plt.legend()
    plt.savefig(f"pr_curves/hoptimus0_atnorm-md_pr_curve_fold_{fold}.png")
    plt.close()

    del model
    gc.collect()
    if device.type == "cuda":
        torch.cuda.empty_cache()

# Averaged PR curve
rec_uniform = np.linspace(0, 1, 1000)
interp_prec_list = []

for prec, rec in zip(all_precisions, all_recalls):
    interp = interp1d(rec[::-1], prec[::-1], bounds_error=False, fill_value=0.0)
    interp_prec_list.append(interp(rec_uniform))

mean_precision = np.mean(interp_prec_list, axis=0)

plt.figure()
plt.plot(rec_uniform, mean_precision,
         label=f"Mean PR (Avg AUC = {np.mean(fold_pr_aucs):.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Average Precision-Recall Curve - H-Optimus-0 Linear Probing")
plt.grid(True)
plt.legend()
plt.savefig("pr_curves/hoptimus0_atnorm-md_pr_curve_average.png")
plt.close()

# Summary
logger.info("\n--- Final Summary (H-Optimus-0 Linear Probing) ---")
logger.info(
    f"Balanced Accuracy: {np.mean(fold_bal_accs):.4f} ± {np.std(fold_bal_accs):.4f}"
)
logger.info(
    f"AUROC: {np.mean(fold_aurocs):.4f} ± {np.std(fold_aurocs):.4f}"
)
logger.info(
    f"PR AUC: {np.mean(fold_pr_aucs):.4f} ± {np.std(fold_pr_aucs):.4f}"
)

# Save predictions
with open("hoptimus0_atnorm-md_test_predictions.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus0_atnorm-md_test_predictions.pkl")


2025-12-11 18:14:28,741 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-0)
2025-12-11 18:14:32,848 - INFO - --- Fold 1 Inference ---
  model = torch.load(
Fold 1: 100%|██████████| 2107/2107 [01:23<00:00, 25.30it/s]
2025-12-11 18:15:56,124 - INFO - Fold 1 - Balanced Accuracy: 0.5537, AUROC: 0.6051, PR AUC: 0.9267
2025-12-11 18:15:56,377 - INFO - --- Fold 2 Inference ---
  model = torch.load(
Fold 2: 100%|██████████| 2107/2107 [01:23<00:00, 25.36it/s]
2025-12-11 18:17:19,462 - INFO - Fold 2 - Balanced Accuracy: 0.6010, AUROC: 0.6297, PR AUC: 0.9297
2025-12-11 18:17:19,715 - INFO - --- Fold 3 Inference ---
  model = torch.load(
Fold 3: 100%|██████████| 2107/2107 [01:23<00:00, 25.24it/s]
2025-12-11 18:18:43,188 - INFO - Fold 3 - Balanced Accuracy: 0.5956, AUROC: 0.6297, PR AUC: 0.9287
2025-12-11 18:18:43,450 - INFO - --- Fold 4 Inference ---
  model = torch.load(
Fold 4: 100%|██████████| 2107/2107 [01:23<00:00, 25.09it/s]
2025-12-11 18:20:07,438 - INFO - Fold

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from huggingface_hub import login
import timm
from torchvision import transforms

# Logging
log_file = "hoptimus0_linear_probe_sens_spec.log"
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")  # replace with your token

# Load H-Optimus-0 model (frozen backbone, as in training)
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform for H-Optimus-0
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Embedding extractor (same as in training)
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset for inference
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return embedding, label

# Classifier head: 1-layer linear probe (matches training script)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
fold_probs_dict = {}

# Per-class sensitivity/specificity across folds
fold_sens_atypical = []
fold_spec_atypical = []
fold_sens_normal = []
fold_spec_normal = []

# Inference loop over 5 folds
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    # Load full model saved during training (with 1-layer linear head)
    model = torch.load(
        f"hoptimus0_linear_probe_fold_{fold}_best.pth",
        map_location=device,
        weights_only=False
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    # Atypical (class 0) as positive
    tp_atyp = np.sum((fold_preds == 0) & (true_labels == 0))
    fn_atyp = np.sum((fold_preds == 1) & (true_labels == 0))
    tn_atyp = np.sum((fold_preds == 1) & (true_labels == 1))
    fp_atyp = np.sum((fold_preds == 0) & (true_labels == 1))

    sens_atypical = tp_atyp / (tp_atyp + fn_atyp) if (tp_atyp + fn_atyp) > 0 else 0.0
    spec_atypical = tn_atyp / (tn_atyp + fp_atyp) if (tn_atyp + fp_atyp) > 0 else 0.0

    # Normal (class 1) as positive
    tp_norm = np.sum((fold_preds == 1) & (true_labels == 1))
    fn_norm = np.sum((fold_preds == 0) & (true_labels == 1))
    tn_norm = np.sum((fold_preds == 0) & (true_labels == 0))
    fp_norm = np.sum((fold_preds == 1) & (true_labels == 0))

    sens_normal = tp_norm / (tp_norm + fn_norm) if (tp_norm + fn_norm) > 0 else 0.0
    spec_normal = tn_norm / (tn_norm + fp_norm) if (tn_norm + fp_norm) > 0 else 0.0

    logger.info(
        f"Fold {fold} - "
        f"Atypical (0): Sensitivity={sens_atypical:.4f}, Specificity={spec_atypical:.4f} | "
        f"Normal (1): Sensitivity={sens_normal:.4f}, Specificity={spec_normal:.4f}"
    )

    fold_sens_atypical.append(sens_atypical)
    fold_spec_atypical.append(spec_atypical)
    fold_sens_normal.append(sens_normal)
    fold_spec_normal.append(spec_normal)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist(),
        "sens_atypical": sens_atypical,
        "spec_atypical": spec_atypical,
        "sens_normal": sens_normal,
        "spec_normal": spec_normal,
    }

    del model
    gc.collect()
    if device.type == "cuda":
        torch.cuda.empty_cache()

# Summary
logger.info("\n--- Final Summary (H-Optimus-0 Linear Probing, threshold=0.5) ---")
logger.info(
    "Atypical (class 0) - Sensitivity: "
    f"{np.mean(fold_sens_atypical):.4f} ± {np.std(fold_sens_atypical):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_atypical):.4f} ± {np.std(fold_spec_atypical):.4f}"
)
logger.info(
    "Normal (class 1)   - Sensitivity: "
    f"{np.mean(fold_sens_normal):.4f} ± {np.std(fold_sens_normal):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_normal):.4f} ± {np.std(fold_spec_normal):.4f}"
)

# Save predictions + metrics
with open("hoptimus0_atnorm-md_test_predictions_sens_spec.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus0_atnorm-md_test_predictions_sens_spec.pkl")


2025-12-12 15:12:27,504 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-0)
2025-12-12 15:12:31,594 - INFO - --- Fold 1 Inference ---
Fold 1: 100%|██████████| 2107/2107 [01:22<00:00, 25.55it/s]
2025-12-12 15:13:54,077 - INFO - Fold 1 - Atypical (0): Sensitivity=0.3379, Specificity=0.7696 | Normal (1): Sensitivity=0.7696, Specificity=0.3379
2025-12-12 15:13:54,247 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 2107/2107 [01:22<00:00, 25.60it/s]
2025-12-12 15:15:16,548 - INFO - Fold 2 - Atypical (0): Sensitivity=0.5845, Specificity=0.6176 | Normal (1): Sensitivity=0.6176, Specificity=0.5845
2025-12-12 15:15:16,759 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 2107/2107 [01:56<00:00, 18.08it/s]
2025-12-12 15:17:13,284 - INFO - Fold 3 - Atypical (0): Sensitivity=0.4932, Specificity=0.6981 | Normal (1): Sensitivity=0.6981, Specificity=0.4932
2025-12-12 15:17:13,464 - INFO - --- Fold 4 Inference ---
Fold 4: 100%|██████████| 2107/2107 [01