## AMi-Br Test Set

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import (
    balanced_accuracy_score, roc_auc_score
)
from huggingface_hub import login
import timm
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="your_huggingface_token_here")  # Replace with your Hugging Face token

# Load pretrained Hoptimus-0 model
hoptimus_model = timm.create_model("hf-hub:bioptimus/H-optimus-0", pretrained=True, init_values=1e-5, dynamic_img_size=False)
hoptimus_model.eval().to(device)

# Hoptimus-0 transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617), std=(0.211883, 0.230117, 0.177517))
])

# Embedding extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset class
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.embeddings = [extract_embedding(p) for p in tqdm(image_paths, desc="Extracting embeddings")]
        self.labels = labels

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.labels[idx]

# Classifier definition
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(1536, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        return self.classifier(x)

# Load test dataset
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    class_folder = os.path.join(test_root, class_name)
    for fname in os.listdir(class_folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_folder, fname))
            labels.append(label_val)

# Prepare dataset and loader
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load models
num_folds = 5
model_paths = [f"hoptimus0_linear_probe_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    model = HoptimusBinaryClassifier().to(device)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference
true_labels = np.array(test_dataset.labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (Hoptimus-0) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "hoptimus0_amibr_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


Extracting embeddings: 100%|██████████| 826/826 [00:15<00:00, 54.54it/s]
  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 52/52 [00:00<00:00, 159.97it/s]



Fold 1 - Balanced Accuracy: 0.6577, AUROC: 0.6983


Inference Fold 2: 100%|██████████| 52/52 [00:00<00:00, 185.87it/s]



Fold 2 - Balanced Accuracy: 0.6402, AUROC: 0.6997


Inference Fold 3: 100%|██████████| 52/52 [00:00<00:00, 197.20it/s]



Fold 3 - Balanced Accuracy: 0.6283, AUROC: 0.6827


Inference Fold 4: 100%|██████████| 52/52 [00:00<00:00, 193.52it/s]



Fold 4 - Balanced Accuracy: 0.6107, AUROC: 0.6929


Inference Fold 5: 100%|██████████| 52/52 [00:00<00:00, 186.71it/s]


Fold 5 - Balanced Accuracy: 0.6490, AUROC: 0.6895

--- Per-Fold Evaluation Summary (Hoptimus-0) ---
Balanced Accuracy: 0.6372 ± 0.0164
AUROC: 0.6926 ± 0.0062

Saved fold predictions and labels to: hoptimus0_amibr_test_predictions.pkl





## AtNorM-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import (
    balanced_accuracy_score, roc_auc_score
)
from huggingface_hub import login
import timm
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="your_huggingface_token_here")  # Replace with your Hugging Face token

# Load pretrained Hoptimus-0 model
hoptimus_model = timm.create_model("hf-hub:bioptimus/H-optimus-0", pretrained=True, init_values=1e-5, dynamic_img_size=False)
hoptimus_model.eval().to(device)

# Hoptimus-0 transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617), std=(0.211883, 0.230117, 0.177517))
])

# Embedding extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset class
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.embeddings = [extract_embedding(p) for p in tqdm(image_paths, desc="Extracting embeddings")]
        self.labels = labels

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.labels[idx]

# Classifier definition
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(1536, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        return self.classifier(x)

# Load test dataset
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    class_folder = os.path.join(test_root, class_name)
    for fname in os.listdir(class_folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_folder, fname))
            labels.append(label_val)

# Prepare dataset and loader
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load models
num_folds = 5
model_paths = [f"hoptimus0_linear_probe_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    model = HoptimusBinaryClassifier().to(device)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference
true_labels = np.array(test_dataset.labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (Hoptimus-0) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "hoptimus0_atnorm-br_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


Extracting embeddings: 100%|██████████| 746/746 [00:13<00:00, 55.40it/s]
  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 47/47 [00:00<00:00, 49.59it/s]



Fold 1 - Balanced Accuracy: 0.5839, AUROC: 0.6492


Inference Fold 2: 100%|██████████| 47/47 [00:01<00:00, 46.70it/s]



Fold 2 - Balanced Accuracy: 0.6118, AUROC: 0.6921


Inference Fold 3: 100%|██████████| 47/47 [00:01<00:00, 43.42it/s]



Fold 3 - Balanced Accuracy: 0.6242, AUROC: 0.7083


Inference Fold 4: 100%|██████████| 47/47 [00:01<00:00, 44.03it/s]



Fold 4 - Balanced Accuracy: 0.6383, AUROC: 0.7085


Inference Fold 5: 100%|██████████| 47/47 [00:01<00:00, 45.41it/s]


Fold 5 - Balanced Accuracy: 0.6029, AUROC: 0.6935

--- Per-Fold Evaluation Summary (Hoptimus-0) ---
Balanced Accuracy: 0.6122 ± 0.0185
AUROC: 0.6903 ± 0.0217

Saved fold predictions and labels to: hoptimus0_atnorm-br_test_predictions.pkl





## AtNorM-MD

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import (
    balanced_accuracy_score, roc_auc_score
)
from huggingface_hub import login
import timm
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="your_huggingface_token_here")  # Replace with your Hugging Face token

# Load pretrained Hoptimus-0 model
hoptimus_model = timm.create_model("hf-hub:bioptimus/H-optimus-0", pretrained=True, init_values=1e-5, dynamic_img_size=False)
hoptimus_model.eval().to(device)

# Hoptimus-0 transform
hoptimus_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617), std=(0.211883, 0.230117, 0.177517))
])

# Embedding extractor
def extract_embedding(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    tensor = hoptimus_transform(image).unsqueeze(0).to(device)
    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        features = hoptimus_model(tensor)
    return features.squeeze(0).cpu()

# Dataset class
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.embeddings = [extract_embedding(p) for p in tqdm(image_paths, desc="Extracting embeddings")]
        self.labels = labels

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.labels[idx]

# Classifier definition
class HoptimusBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(1536, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        return self.classifier(x)

# Load test dataset
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    class_folder = os.path.join(test_root, class_name)
    for fname in os.listdir(class_folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(class_folder, fname))
            labels.append(label_val)

# Prepare dataset and loader
test_dataset = InferenceDataset(image_paths, labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load models
num_folds = 5
model_paths = [f"hoptimus0_linear_probe_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    model = HoptimusBinaryClassifier().to(device)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference
true_labels = np.array(test_dataset.labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for embeddings, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (Hoptimus-0) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "hoptimus0_atnorm-md_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


Extracting embeddings: 100%|██████████| 2107/2107 [00:37<00:00, 56.55it/s]
  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 132/132 [00:01<00:00, 118.75it/s]



Fold 1 - Balanced Accuracy: 0.4975, AUROC: 0.5313


Inference Fold 2: 100%|██████████| 132/132 [00:01<00:00, 117.46it/s]



Fold 2 - Balanced Accuracy: 0.5345, AUROC: 0.5556


Inference Fold 3: 100%|██████████| 132/132 [00:01<00:00, 118.07it/s]



Fold 3 - Balanced Accuracy: 0.5686, AUROC: 0.6126


Inference Fold 4: 100%|██████████| 132/132 [00:01<00:00, 116.80it/s]



Fold 4 - Balanced Accuracy: 0.5936, AUROC: 0.6348


Inference Fold 5: 100%|██████████| 132/132 [00:01<00:00, 120.47it/s]


Fold 5 - Balanced Accuracy: 0.6072, AUROC: 0.6334

--- Per-Fold Evaluation Summary (Hoptimus-0) ---
Balanced Accuracy: 0.5603 ± 0.0400
AUROC: 0.5935 ± 0.0423

Saved fold predictions and labels to: hoptimus0_atnorm-md_test_predictions.pkl



