## AMi-Br Test Set

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from sklearn.metrics import (
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)
import matplotlib.pyplot as plt
from huggingface_hub import login
import timm
from torchvision import transforms
from scipy.interpolate import interp1d

# Logging setup
log_file = "hoptimus1_linear_probe_inference.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login and model load
login(token="hf_xxx")  # Replace with your actual token
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-1",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617),
                         std=(0.211883, 0.230117, 0.177517))
])

# Feature extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        return embedding, self.labels[idx]

# Classifier head (pure linear probe: 1 layer)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
os.makedirs("pr_curves", exist_ok=True)
fold_probs_dict = {}
fold_bal_accs, fold_aurocs, fold_pr_aucs = [], [], []
all_precisions, all_recalls = [], []

# Evaluate each fold
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)
    precision, recall, _ = precision_recall_curve(true_labels, fold_probs)
    pr_auc = average_precision_score(true_labels, fold_probs)

    logger.info(f"Fold {fold} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}, PR AUC: {pr_auc:.4f}")
    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)
    fold_pr_aucs.append(pr_auc)
    all_precisions.append(precision)
    all_recalls.append(recall)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist()
    }

    # PR Curve
    plt.figure()
    plt.plot(recall, precision, label=f"Fold {fold} (AP = {pr_auc:.4f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"PR Curve - Fold {fold}")
    plt.grid(True)
    plt.legend()
    plt.savefig(f"pr_curves/hoptimus1_amibr_pr_curve_fold_{fold}.png")
    plt.close()

    del model
    gc.collect()
    torch.cuda.empty_cache()

# Averaged PR Curve
rec_uniform = np.linspace(0, 1, 1000)
interp_prec_list = []

for prec, rec in zip(all_precisions, all_recalls):
    interp = interp1d(rec[::-1], prec[::-1], bounds_error=False, fill_value=0.0)
    interp_prec_list.append(interp(rec_uniform))

mean_precision = np.mean(interp_prec_list, axis=0)

plt.figure()
plt.plot(rec_uniform, mean_precision, label=f"Mean PR (Avg AUC = {np.mean(fold_pr_aucs):.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Average PR Curve - H-Optimus-1 Linear Probing")
plt.grid(True)
plt.legend()
plt.savefig("pr_curves/hoptimus1_amibr_pr_curve_average.png")
plt.close()

# Summary
logger.info("\n--- Final Summary (H-Optimus-1 Linear Probing) ---")
logger.info(f"Balanced Accuracy: {np.mean(fold_bal_accs):.4f} ± {np.std(fold_bal_accs):.4f}")
logger.info(f"AUROC: {np.mean(fold_aurocs):.4f} ± {np.std(fold_aurocs):.4f}")
logger.info(f"PR AUC: {np.mean(fold_pr_aucs):.4f} ± {np.std(fold_pr_aucs):.4f}")

with open("hoptimus1_amibr_test_predictions.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus1_amibr_test_predictions.pkl")


2025-12-11 19:55:30,478 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-1)
2025-12-11 19:55:30,745 - INFO - [bioptimus/H-optimus-1] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2025-12-11 19:55:32,899 - INFO - --- Fold 1 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
Fold 1: 100%|██████████| 826/826 [00:32<00:00, 25.64it/s]
2025-12-11 19:56:05,121 - INFO - Fold 1 - Balanced Accuracy: 0.5686, AUROC: 0.6224, PR AUC: 0.8506
2025-12-11 19:56:05,368 - INFO - --- Fold 2 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
Fold 2: 100%|██████████| 826/826 [00:31<00:00, 25.88it/s]
2025-12-11 19:56:37,291 - INFO - Fold 2 - Balanced Accuracy: 0.5997, AUROC: 0.6589, PR AUC: 0.8643
2025-12-11 19:56:37,528 - INFO - --- Fold 3 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from huggingface_hub import login
import timm
from torchvision import transforms

# Logging setup
log_file = "hoptimus1_linear_probe_sens_spec.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login and model load
login(token="hf_xxx")  # Replace with your actual token
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-1",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Feature extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        return embedding, self.labels[idx]

# Classifier head (pure linear probe: 1 layer)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
fold_probs_dict = {}

# Per-class sensitivity/specificity across folds
fold_sens_atypical = []
fold_spec_atypical = []
fold_sens_normal = []
fold_spec_normal = []

# Evaluate each fold
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(
        f"hoptimus1_linear_probe_fold_{fold}_best.pth",
        map_location=device
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    # Atypical (class 0) as positive
    tp_atyp = np.sum((fold_preds == 0) & (true_labels == 0))
    fn_atyp = np.sum((fold_preds == 1) & (true_labels == 0))
    tn_atyp = np.sum((fold_preds == 1) & (true_labels == 1))
    fp_atyp = np.sum((fold_preds == 0) & (true_labels == 1))

    sens_atypical = tp_atyp / (tp_atyp + fn_atyp) if (tp_atyp + fn_atyp) > 0 else 0.0
    spec_atypical = tn_atyp / (tn_atyp + fp_atyp) if (tn_atyp + fp_atyp) > 0 else 0.0

    # Normal (class 1) as positive
    tp_norm = np.sum((fold_preds == 1) & (true_labels == 1))
    fn_norm = np.sum((fold_preds == 0) & (true_labels == 1))
    tn_norm = np.sum((fold_preds == 0) & (true_labels == 0))
    fp_norm = np.sum((fold_preds == 1) & (true_labels == 0))

    sens_normal = tp_norm / (tp_norm + fn_norm) if (tp_norm + fn_norm) > 0 else 0.0
    spec_normal = tn_norm / (tn_norm + fp_norm) if (tn_norm + fp_norm) > 0 else 0.0

    logger.info(
        f"Fold {fold} - "
        f"Atypical (0): Sensitivity={sens_atypical:.4f}, Specificity={spec_atypical:.4f} | "
        f"Normal (1): Sensitivity={sens_normal:.4f}, Specificity={spec_normal:.4f}"
    )

    fold_sens_atypical.append(sens_atypical)
    fold_spec_atypical.append(spec_atypical)
    fold_sens_normal.append(sens_normal)
    fold_spec_normal.append(spec_normal)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist(),
        "sens_atypical": sens_atypical,
        "spec_atypical": spec_atypical,
        "sens_normal": sens_normal,
        "spec_normal": spec_normal,
    }

    del model
    gc.collect()
    torch.cuda.empty_cache()

# Summary
logger.info("\n--- Final Summary (H-Optimus-1 Linear Probing, threshold=0.5) ---")
logger.info(
    "Atypical (class 0) - Sensitivity: "
    f"{np.mean(fold_sens_atypical):.4f} ± {np.std(fold_sens_atypical):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_atypical):.4f} ± {np.std(fold_spec_atypical):.4f}"
)
logger.info(
    "Normal (class 1)   - Sensitivity: "
    f"{np.mean(fold_sens_normal):.4f} ± {np.std(fold_sens_normal):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_normal):.4f} ± {np.std(fold_spec_normal):.4f}"
)

with open("hoptimus1_amibr_test_predictions_sens_spec.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus1_amibr_test_predictions_sens_spec.pkl")


2025-12-12 15:15:17,479 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-1)
2025-12-12 15:15:17,634 - INFO - [bioptimus/H-optimus-1] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2025-12-12 15:15:20,175 - INFO - --- Fold 1 Inference ---
  model = torch.load(
Fold 1: 100%|██████████| 826/826 [00:46<00:00, 17.89it/s]
2025-12-12 15:16:06,353 - INFO - Fold 1 - Atypical (0): Sensitivity=0.4674, Specificity=0.6698 | Normal (1): Sensitivity=0.6698, Specificity=0.4674
2025-12-12 15:16:06,544 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 826/826 [00:46<00:00, 17.82it/s]
2025-12-12 15:16:52,900 - INFO - Fold 2 - Atypical (0): Sensitivity=0.5000, Specificity=0.6994 | Normal (1): Sensitivity=0.6994, Specificity=0.5000
2025-12-12 15:16:53,073 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 826/826 [00:45<00:00, 17.98it/s]
2025-12-12 15:17:39,016 - INFO - Fold 3 - Atypical (0): Sen

## AtNorM-Br

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from sklearn.metrics import (
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)
import matplotlib.pyplot as plt
from huggingface_hub import login
import timm
from torchvision import transforms
from scipy.interpolate import interp1d

# Logging setup
log_file = "hoptimus1_linear_probe_inference.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login and model load
login(token="hf_xxx")  # Replace with your actual token
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-1",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Feature extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        return embedding, self.labels[idx]

# Classifier head (pure linear probe: 1 layer)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
os.makedirs("pr_curves", exist_ok=True)
fold_probs_dict = {}
fold_bal_accs, fold_aurocs, fold_pr_aucs = [], [], []
all_precisions, all_recalls = [], []

# Evaluate each fold
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)
    precision, recall, _ = precision_recall_curve(true_labels, fold_probs)
    pr_auc = average_precision_score(true_labels, fold_probs)

    logger.info(f"Fold {fold} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}, PR AUC: {pr_auc:.4f}")
    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)
    fold_pr_aucs.append(pr_auc)
    all_precisions.append(precision)
    all_recalls.append(recall)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist()
    }

    # PR Curve
    plt.figure()
    plt.plot(recall, precision, label=f"Fold {fold} (AP = {pr_auc:.4f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"PR Curve - Fold {fold}")
    plt.grid(True)
    plt.legend()
    plt.savefig(f"pr_curves/hoptimus1_atnorm-br_pr_curve_fold_{fold}.png")
    plt.close()

    del model
    gc.collect()
    torch.cuda.empty_cache()

# Averaged PR Curve
rec_uniform = np.linspace(0, 1, 1000)
interp_prec_list = []

for prec, rec in zip(all_precisions, all_recalls):
    interp = interp1d(rec[::-1], prec[::-1], bounds_error=False, fill_value=0.0)
    interp_prec_list.append(interp(rec_uniform))

mean_precision = np.mean(interp_prec_list, axis=0)

plt.figure()
plt.plot(rec_uniform, mean_precision, label=f"Mean PR (Avg AUC = {np.mean(fold_pr_aucs):.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Average PR Curve - H-Optimus-1 Linear Probing")
plt.grid(True)
plt.legend()
plt.savefig("pr_curves/hoptimus1_atnorm-br_pr_curve_average.png")
plt.close()

# Summary
logger.info("\n--- Final Summary (H-Optimus-1 Linear Probing) ---")
logger.info(f"Balanced Accuracy: {np.mean(fold_bal_accs):.4f} ± {np.std(fold_bal_accs):.4f}")
logger.info(f"AUROC: {np.mean(fold_aurocs):.4f} ± {np.std(fold_aurocs):.4f}")
logger.info(f"PR AUC: {np.mean(fold_pr_aucs):.4f} ± {np.std(fold_pr_aucs):.4f}")

with open("hoptimus1_atnorm-br_test_predictions.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus1_atnorm-br_test_predictions.pkl")


2025-12-11 19:59:27,427 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-1)
2025-12-11 19:59:27,571 - INFO - [bioptimus/H-optimus-1] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2025-12-11 19:59:29,485 - INFO - --- Fold 1 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
Fold 1: 100%|██████████| 746/746 [00:29<00:00, 25.63it/s]
2025-12-11 19:59:58,602 - INFO - Fold 1 - Balanced Accuracy: 0.5993, AUROC: 0.6289, PR AUC: 0.8858
2025-12-11 19:59:58,851 - INFO - --- Fold 2 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
Fold 2: 100%|██████████| 746/746 [00:28<00:00, 25.86it/s]
2025-12-11 20:00:27,712 - INFO - Fold 2 - Balanced Accuracy: 0.6397, AUROC: 0.7124, PR AUC: 0.9194
2025-12-11 20:00:27,959 - INFO - --- Fold 3 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from huggingface_hub import login
import timm
from torchvision import transforms

# Logging setup
log_file = "hoptimus1_linear_probe_sens_spec.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login and model load
login(token="hf_xxx")  # Replace with your actual token
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-1",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Feature extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        return embedding, self.labels[idx]

# Classifier head (pure linear probe: 1 layer)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
fold_probs_dict = {}

# Per-class sensitivity/specificity across folds
fold_sens_atypical = []
fold_spec_atypical = []
fold_sens_normal = []
fold_spec_normal = []

# Evaluate each fold
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(
        f"hoptimus1_linear_probe_fold_{fold}_best.pth",
        map_location=device
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    # Atypical (class 0) as positive
    tp_atyp = np.sum((fold_preds == 0) & (true_labels == 0))
    fn_atyp = np.sum((fold_preds == 1) & (true_labels == 0))
    tn_atyp = np.sum((fold_preds == 1) & (true_labels == 1))
    fp_atyp = np.sum((fold_preds == 0) & (true_labels == 1))

    sens_atypical = tp_atyp / (tp_atyp + fn_atyp) if (tp_atyp + fn_atyp) > 0 else 0.0
    spec_atypical = tn_atyp / (tn_atyp + fp_atyp) if (tn_atyp + fp_atyp) > 0 else 0.0

    # Normal (class 1) as positive
    tp_norm = np.sum((fold_preds == 1) & (true_labels == 1))
    fn_norm = np.sum((fold_preds == 0) & (true_labels == 1))
    tn_norm = np.sum((fold_preds == 0) & (true_labels == 0))
    fp_norm = np.sum((fold_preds == 1) & (true_labels == 0))

    sens_normal = tp_norm / (tp_norm + fn_norm) if (tp_norm + fn_norm) > 0 else 0.0
    spec_normal = tn_norm / (tn_norm + fp_norm) if (tn_norm + fp_norm) > 0 else 0.0

    logger.info(
        f"Fold {fold} - "
        f"Atypical (0): Sensitivity={sens_atypical:.4f}, Specificity={spec_atypical:.4f} | "
        f"Normal (1): Sensitivity={sens_normal:.4f}, Specificity={spec_normal:.4f}"
    )

    fold_sens_atypical.append(sens_atypical)
    fold_spec_atypical.append(spec_atypical)
    fold_sens_normal.append(sens_normal)
    fold_spec_normal.append(spec_normal)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist(),
        "sens_atypical": sens_atypical,
        "spec_atypical": spec_atypical,
        "sens_normal": sens_normal,
        "spec_normal": spec_normal,
    }

    del model
    gc.collect()
    torch.cuda.empty_cache()

# Summary
logger.info("\n--- Final Summary (H-Optimus-1 Linear Probing, threshold=0.5) ---")
logger.info(
    "Atypical (class 0) - Sensitivity: "
    f"{np.mean(fold_sens_atypical):.4f} ± {np.std(fold_sens_atypical):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_atypical):.4f} ± {np.std(fold_spec_atypical):.4f}"
)
logger.info(
    "Normal (class 1)   - Sensitivity: "
    f"{np.mean(fold_sens_normal):.4f} ± {np.std(fold_sens_normal):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_normal):.4f} ± {np.std(fold_spec_normal):.4f}"
)

with open("hoptimus1_atnorm-br_test_predictions_sens_spec.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus1_atnorm-br_test_predictions_sens_spec.pkl")


2025-12-12 15:21:38,286 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-1)
2025-12-12 15:21:38,424 - INFO - [bioptimus/H-optimus-1] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2025-12-12 15:21:40,185 - INFO - --- Fold 1 Inference ---
  model = torch.load(
Fold 1: 100%|██████████| 746/746 [00:29<00:00, 25.32it/s]
2025-12-12 15:22:09,659 - INFO - Fold 1 - Atypical (0): Sensitivity=0.6016, Specificity=0.5971 | Normal (1): Sensitivity=0.5971, Specificity=0.6016
2025-12-12 15:22:09,890 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 746/746 [00:29<00:00, 25.56it/s]
2025-12-12 15:22:39,087 - INFO - Fold 2 - Atypical (0): Sensitivity=0.5156, Specificity=0.7638 | Normal (1): Sensitivity=0.7638, Specificity=0.5156
2025-12-12 15:22:39,267 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 746/746 [00:29<00:00, 25.54it/s]
2025-12-12 15:23:08,479 - INFO - Fold 3 - Atypical (0): Sen

## AtNorM-MD

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from sklearn.metrics import (
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)
import matplotlib.pyplot as plt
from huggingface_hub import login
import timm
from torchvision import transforms
from scipy.interpolate import interp1d

# Logging setup
log_file = "hoptimus1_linear_probe_inference.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login and model load
login(token="hf_xxx")  # Replace with your actual token
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-1",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Feature extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        return embedding, self.labels[idx]

# Classifier head (pure linear probe: 1 layer)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
os.makedirs("pr_curves", exist_ok=True)
fold_probs_dict = {}
fold_bal_accs, fold_aurocs, fold_pr_aucs = [], [], []
all_precisions, all_recalls = [], []

# Evaluate each fold
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)
    precision, recall, _ = precision_recall_curve(true_labels, fold_probs)
    pr_auc = average_precision_score(true_labels, fold_probs)

    logger.info(f"Fold {fold} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}, PR AUC: {pr_auc:.4f}")
    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)
    fold_pr_aucs.append(pr_auc)
    all_precisions.append(precision)
    all_recalls.append(recall)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist()
    }

    # PR Curve
    plt.figure()
    plt.plot(recall, precision, label=f"Fold {fold} (AP = {pr_auc:.4f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"PR Curve - Fold {fold}")
    plt.grid(True)
    plt.legend()
    plt.savefig(f"pr_curves/hoptimus1_atnorm-md_pr_curve_fold_{fold}.png")
    plt.close()

    del model
    gc.collect()
    torch.cuda.empty_cache()

# Averaged PR Curve
rec_uniform = np.linspace(0, 1, 1000)
interp_prec_list = []

for prec, rec in zip(all_precisions, all_recalls):
    interp = interp1d(rec[::-1], prec[::-1], bounds_error=False, fill_value=0.0)
    interp_prec_list.append(interp(rec_uniform))

mean_precision = np.mean(interp_prec_list, axis=0)

plt.figure()
plt.plot(rec_uniform, mean_precision, label=f"Mean PR (Avg AUC = {np.mean(fold_pr_aucs):.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Average PR Curve - H-Optimus-1 Linear Probing")
plt.grid(True)
plt.legend()
plt.savefig("pr_curves/hoptimus1_atnorm-md_pr_curve_average.png")
plt.close()

# Summary
logger.info("\n--- Final Summary (H-Optimus-1 Linear Probing) ---")
logger.info(f"Balanced Accuracy: {np.mean(fold_bal_accs):.4f} ± {np.std(fold_bal_accs):.4f}")
logger.info(f"AUROC: {np.mean(fold_aurocs):.4f} ± {np.std(fold_aurocs):.4f}")
logger.info(f"PR AUC: {np.mean(fold_pr_aucs):.4f} ± {np.std(fold_pr_aucs):.4f}")

with open("hoptimus1_atnorm-md_test_predictions.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus1_atnorm-md_test_predictions.pkl")


2025-12-11 20:02:40,177 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-1)
2025-12-11 20:02:40,308 - INFO - [bioptimus/H-optimus-1] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2025-12-11 20:02:42,223 - INFO - --- Fold 1 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
Fold 1: 100%|██████████| 2107/2107 [01:21<00:00, 25.84it/s]
2025-12-11 20:04:03,758 - INFO - Fold 1 - Balanced Accuracy: 0.5481, AUROC: 0.5690, PR AUC: 0.9126
2025-12-11 20:04:04,025 - INFO - --- Fold 2 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{fold}_best.pth", map_location=device)
Fold 2: 100%|██████████| 2107/2107 [01:21<00:00, 25.90it/s]
2025-12-11 20:05:25,374 - INFO - Fold 2 - Balanced Accuracy: 0.5807, AUROC: 0.6525, PR AUC: 0.9369
2025-12-11 20:05:25,613 - INFO - --- Fold 3 Inference ---
  model = torch.load(f"hoptimus1_linear_probe_fold_{f

In [None]:
import os
import gc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pickle
import logging
from huggingface_hub import login
import timm
from torchvision import transforms

# Logging setup
log_file = "hoptimus1_linear_probe_sens_spec.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login and model load
login(token="hf_xxx")  # Replace with your actual token
hoptimus_model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-1",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
).to(device).eval()

# Transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517)
    )
])

# Feature extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        embedding = extract_embedding(self.image_paths[idx])
        return embedding, self.labels[idx]

# Classifier head (pure linear probe: 1 layer)
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Linear(1536, 1)

    def forward(self, x):
        return self.classifier(x)

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for label_name, label_val in class_map.items():
    class_dir = os.path.join(test_root, label_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label_val)

true_labels = np.array(labels)
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Output setup
fold_probs_dict = {}

# Per-class sensitivity/specificity across folds
fold_sens_atypical = []
fold_spec_atypical = []
fold_sens_normal = []
fold_spec_normal = []

# Evaluate each fold
for fold in range(1, 6):
    logger.info(f"--- Fold {fold} Inference ---")

    model = torch.load(
        f"hoptimus1_linear_probe_fold_{fold}_best.pth",
        map_location=device
    )
    model.to(device).eval()

    fold_probs = []
    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Fold {fold}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            prob = torch.sigmoid(logits).squeeze().cpu().item()
            fold_probs.append(prob)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    # Atypical (class 0) as positive
    tp_atyp = np.sum((fold_preds == 0) & (true_labels == 0))
    fn_atyp = np.sum((fold_preds == 1) & (true_labels == 0))
    tn_atyp = np.sum((fold_preds == 1) & (true_labels == 1))
    fp_atyp = np.sum((fold_preds == 0) & (true_labels == 1))

    sens_atypical = tp_atyp / (tp_atyp + fn_atyp) if (tp_atyp + fn_atyp) > 0 else 0.0
    spec_atypical = tn_atyp / (tn_atyp + fp_atyp) if (tn_atyp + fp_atyp) > 0 else 0.0

    # Normal (class 1) as positive
    tp_norm = np.sum((fold_preds == 1) & (true_labels == 1))
    fn_norm = np.sum((fold_preds == 0) & (true_labels == 1))
    tn_norm = np.sum((fold_preds == 0) & (true_labels == 0))
    fp_norm = np.sum((fold_preds == 1) & (true_labels == 0))

    sens_normal = tp_norm / (tp_norm + fn_norm) if (tp_norm + fn_norm) > 0 else 0.0
    spec_normal = tn_norm / (tn_norm + fp_norm) if (tn_norm + fp_norm) > 0 else 0.0

    logger.info(
        f"Fold {fold} - "
        f"Atypical (0): Sensitivity={sens_atypical:.4f}, Specificity={spec_atypical:.4f} | "
        f"Normal (1): Sensitivity={sens_normal:.4f}, Specificity={spec_normal:.4f}"
    )

    fold_sens_atypical.append(sens_atypical)
    fold_spec_atypical.append(spec_atypical)
    fold_sens_normal.append(sens_normal)
    fold_spec_normal.append(spec_normal)

    fold_probs_dict[f"fold_{fold}"] = {
        "probs": fold_probs.tolist(),
        "preds": fold_preds.tolist(),
        "true_labels": true_labels.tolist(),
        "sens_atypical": sens_atypical,
        "spec_atypical": spec_atypical,
        "sens_normal": sens_normal,
        "spec_normal": spec_normal,
    }

    del model
    gc.collect()
    torch.cuda.empty_cache()

# Summary
logger.info("\n--- Final Summary (H-Optimus-1 Linear Probing, threshold=0.5) ---")
logger.info(
    "Atypical (class 0) - Sensitivity: "
    f"{np.mean(fold_sens_atypical):.4f} ± {np.std(fold_sens_atypical):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_atypical):.4f} ± {np.std(fold_spec_atypical):.4f}"
)
logger.info(
    "Normal (class 1)   - Sensitivity: "
    f"{np.mean(fold_sens_normal):.4f} ± {np.std(fold_sens_normal):.4f}, "
    "Specificity: "
    f"{np.mean(fold_spec_normal):.4f} ± {np.std(fold_spec_normal):.4f}"
)

with open("hoptimus1_atnorm-md_test_predictions_sens_spec.pkl", "wb") as f:
    pickle.dump(fold_probs_dict, f)

logger.info("Saved prediction results to hoptimus1_atnorm-md_test_predictions_sens_spec.pkl")


2025-12-12 15:24:52,921 - INFO - Loading pretrained weights from Hugging Face hub (bioptimus/H-optimus-1)
2025-12-12 15:24:53,053 - INFO - [bioptimus/H-optimus-1] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2025-12-12 15:24:54,783 - INFO - --- Fold 1 Inference ---
  model = torch.load(
Fold 1: 100%|██████████| 2107/2107 [01:22<00:00, 25.51it/s]
2025-12-12 15:26:17,383 - INFO - Fold 1 - Atypical (0): Sensitivity=0.3653, Specificity=0.7309 | Normal (1): Sensitivity=0.7309, Specificity=0.3653
2025-12-12 15:26:17,598 - INFO - --- Fold 2 Inference ---
Fold 2: 100%|██████████| 2107/2107 [01:22<00:00, 25.47it/s]
2025-12-12 15:27:40,339 - INFO - Fold 2 - Atypical (0): Sensitivity=0.3151, Specificity=0.8464 | Normal (1): Sensitivity=0.8464, Specificity=0.3151
2025-12-12 15:27:40,533 - INFO - --- Fold 3 Inference ---
Fold 3: 100%|██████████| 2107/2107 [01:22<00:00, 25.54it/s]
2025-12-12 15:29:03,047 - INFO - Fold 3 - Atypical (0