## AMi-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
import timm
from peft import get_peft_model, LoraConfig
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")

# LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Image transform for test set
test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617),
                         std=(0.211883, 0.230117, 0.177517))
])

# Dataset for inference
class TestImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label in class_map.items():
    class_dir = os.path.join(test_root, class_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label)

# Create dataset and loader
test_dataset = TestImageDataset(image_paths, labels, test_transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2, pin_memory=True)

# Inference loop per fold to avoid OOM
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}
num_folds = 5

for i in range(num_folds):
    print(f"\n--- Inference: Fold {i + 1} ---")

    # Load and prepare model
    model_path = f"hoptimus1_lora_fold_{i + 1}_best.pth"
    base_model = timm.create_model(
        "hf-hub:bioptimus/H-optimus-1",
        pretrained=True,
        num_classes=0,
        init_values=1e-5,
        dynamic_img_size=False
    )
    base_model.head = nn.Linear(base_model.num_features, 1)
    base_model = base_model.to(device)

    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    fold_probs = []

    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        for images_batch, _ in tqdm(test_loader, desc=f"Evaluating Fold {i + 1}"):
            images_batch = images_batch.to(device, non_blocking=True)
            logits = model(images_batch)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"Fold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

    # Clean up GPU memory
    del model
    del base_model
    torch.cuda.empty_cache()

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Final Evaluation Summary (Hoptimus-1 LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save results
output_path = "hoptimus1_amibr_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"Saved predictions to: {output_path}")



--- Inference: Fold 1 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 1: 100%|██████████| 207/207 [00:11<00:00, 18.80it/s]


Fold 1 - Balanced Accuracy: 0.7875, AUROC: 0.8480

--- Inference: Fold 2 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 2: 100%|██████████| 207/207 [00:17<00:00, 11.98it/s]


Fold 2 - Balanced Accuracy: 0.7854, AUROC: 0.8609

--- Inference: Fold 3 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 3: 100%|██████████| 207/207 [00:10<00:00, 20.67it/s]


Fold 3 - Balanced Accuracy: 0.7622, AUROC: 0.8579

--- Inference: Fold 4 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 4: 100%|██████████| 207/207 [00:16<00:00, 12.54it/s]


Fold 4 - Balanced Accuracy: 0.7824, AUROC: 0.8657

--- Inference: Fold 5 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 5: 100%|██████████| 207/207 [00:17<00:00, 12.07it/s]


Fold 5 - Balanced Accuracy: 0.7638, AUROC: 0.8730

--- Final Evaluation Summary (Hoptimus-1 LoRA) ---
Balanced Accuracy: 0.7762 ± 0.0110
AUROC: 0.8611 ± 0.0083
Saved predictions to: hoptimus1_amibr_test_predictions.pkl


## AtNorM-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
import timm
from peft import get_peft_model, LoraConfig
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")

# LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Image transform for test set
test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617),
                         std=(0.211883, 0.230117, 0.177517))
])

# Dataset for inference
class TestImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label in class_map.items():
    class_dir = os.path.join(test_root, class_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label)

# Create dataset and loader
test_dataset = TestImageDataset(image_paths, labels, test_transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2, pin_memory=True)

# Inference loop per fold to avoid OOM
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}
num_folds = 5

for i in range(num_folds):
    print(f"\n--- Inference: Fold {i + 1} ---")

    # Load and prepare model
    model_path = f"hoptimus1_lora_fold_{i + 1}_best.pth"
    base_model = timm.create_model(
        "hf-hub:bioptimus/H-optimus-1",
        pretrained=True,
        num_classes=0,
        init_values=1e-5,
        dynamic_img_size=False
    )
    base_model.head = nn.Linear(base_model.num_features, 1)
    base_model = base_model.to(device)

    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    fold_probs = []

    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        for images_batch, _ in tqdm(test_loader, desc=f"Evaluating Fold {i + 1}"):
            images_batch = images_batch.to(device, non_blocking=True)
            logits = model(images_batch)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"Fold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

    # Clean up GPU memory
    del model
    del base_model
    torch.cuda.empty_cache()

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Final Evaluation Summary (Hoptimus-1 LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save results
output_path = "hoptimus1_atnorm-br_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"Saved predictions to: {output_path}")



--- Inference: Fold 1 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 1: 100%|██████████| 187/187 [00:07<00:00, 25.02it/s]


Fold 1 - Balanced Accuracy: 0.7234, AUROC: 0.8438

--- Inference: Fold 2 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 2: 100%|██████████| 187/187 [00:07<00:00, 25.08it/s]


Fold 2 - Balanced Accuracy: 0.7845, AUROC: 0.8508

--- Inference: Fold 3 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 3: 100%|██████████| 187/187 [00:07<00:00, 25.11it/s]


Fold 3 - Balanced Accuracy: 0.7572, AUROC: 0.8516

--- Inference: Fold 4 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 4: 100%|██████████| 187/187 [00:07<00:00, 25.10it/s]


Fold 4 - Balanced Accuracy: 0.8112, AUROC: 0.8713

--- Inference: Fold 5 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 5: 100%|██████████| 187/187 [00:07<00:00, 25.02it/s]


Fold 5 - Balanced Accuracy: 0.7529, AUROC: 0.8442

--- Final Evaluation Summary (Hoptimus-1 LoRA) ---
Balanced Accuracy: 0.7658 ± 0.0298
AUROC: 0.8524 ± 0.0100
Saved predictions to: hoptimus1_atnorm-br_test_predictions.pkl


## AtNorM-MD

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
import timm
from peft import get_peft_model, LoraConfig
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")

# LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Image transform for test set
test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.707223, 0.578729, 0.703617),
                         std=(0.211883, 0.230117, 0.177517))
])

# Dataset for inference
class TestImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label in class_map.items():
    class_dir = os.path.join(test_root, class_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label)

# Create dataset and loader
test_dataset = TestImageDataset(image_paths, labels, test_transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2, pin_memory=True)

# Inference loop per fold to avoid OOM
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}
num_folds = 5

for i in range(num_folds):
    print(f"\n--- Inference: Fold {i + 1} ---")

    # Load and prepare model
    model_path = f"hoptimus1_lora_fold_{i + 1}_best.pth"
    base_model = timm.create_model(
        "hf-hub:bioptimus/H-optimus-1",
        pretrained=True,
        num_classes=0,
        init_values=1e-5,
        dynamic_img_size=False
    )
    base_model.head = nn.Linear(base_model.num_features, 1)
    base_model = base_model.to(device)

    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    fold_probs = []

    with torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
        for images_batch, _ in tqdm(test_loader, desc=f"Evaluating Fold {i + 1}"):
            images_batch = images_batch.to(device, non_blocking=True)
            logits = model(images_batch)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"Fold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

    # Clean up GPU memory
    del model
    del base_model
    torch.cuda.empty_cache()

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Final Evaluation Summary (Hoptimus-1 LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save results
output_path = "hoptimus1_atnorm-md_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"Saved predictions to: {output_path}")



--- Inference: Fold 1 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 1: 100%|██████████| 527/527 [00:20<00:00, 25.55it/s]


Fold 1 - Balanced Accuracy: 0.7350, AUROC: 0.8258

--- Inference: Fold 2 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 2: 100%|██████████| 527/527 [00:20<00:00, 25.61it/s]


Fold 2 - Balanced Accuracy: 0.7409, AUROC: 0.8332

--- Inference: Fold 3 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 3: 100%|██████████| 527/527 [00:20<00:00, 25.56it/s]


Fold 3 - Balanced Accuracy: 0.7384, AUROC: 0.8366

--- Inference: Fold 4 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 4: 100%|██████████| 527/527 [00:22<00:00, 22.98it/s]


Fold 4 - Balanced Accuracy: 0.7102, AUROC: 0.8569

--- Inference: Fold 5 ---


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Fold 5: 100%|██████████| 527/527 [00:27<00:00, 19.10it/s]


Fold 5 - Balanced Accuracy: 0.7734, AUROC: 0.8421

--- Final Evaluation Summary (Hoptimus-1 LoRA) ---
Balanced Accuracy: 0.7396 ± 0.0202
AUROC: 0.8389 ± 0.0104
Saved predictions to: hoptimus1_atnorm-md_test_predictions.pkl
