## Ami-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
from timm import create_model
from timm.layers import SwiGLUPacked
from torchvision import transforms
from peft import LoraConfig, get_peft_model
import pickle

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")

# LoRA config (must match training)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Transform (same as val_transform in training)
val_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    folder = os.path.join(test_root, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label_val)

# Dataloader
test_dataset = InferenceDataset(image_paths, labels, val_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load models
num_folds = 5
model_paths = [f"virchow_lora_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    base_model = create_model(
        "hf-hub:paige-ai/Virchow",
        pretrained=True,
        mlp_layer=SwiGLUPacked,
        act_layer=nn.SiLU
    )
    base_model.reset_classifier(num_classes=1)
    base_model = base_model.to(device)

    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            images = images.to(device)
            outputs = model(images)

            if outputs.ndim == 3:
                outputs = outputs[:, 0]

            if outputs.ndim == 2 and outputs.size(1) > 1:
                if not hasattr(model, 'final_proj'):
                    model.final_proj = nn.Linear(outputs.size(1), 1).to(device)
                outputs = model.final_proj(outputs)

            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(1)
            elif outputs.ndim == 2 and outputs.size(1) != 1:
                outputs = outputs[:, :1]

            probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (Virchow LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "virchow_amibr_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 52/52 [00:11<00:00,  4.56it/s]



Fold 1 - Balanced Accuracy: 0.7580, AUROC: 0.8761


Inference Fold 2: 100%|██████████| 52/52 [00:11<00:00,  4.72it/s]



Fold 2 - Balanced Accuracy: 0.7704, AUROC: 0.8722


Inference Fold 3: 100%|██████████| 52/52 [00:10<00:00,  4.89it/s]



Fold 3 - Balanced Accuracy: 0.7895, AUROC: 0.9075


Inference Fold 4: 100%|██████████| 52/52 [00:10<00:00,  4.75it/s]



Fold 4 - Balanced Accuracy: 0.8316, AUROC: 0.9063


Inference Fold 5: 100%|██████████| 52/52 [00:11<00:00,  4.62it/s]


Fold 5 - Balanced Accuracy: 0.7894, AUROC: 0.8832

--- Per-Fold Evaluation Summary (Virchow LoRA) ---
Balanced Accuracy: 0.7878 ± 0.0250
AUROC: 0.8891 ± 0.0150

Saved fold predictions and labels to: virchow_amibr_test_predictions.pkl





## AtNorM-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
from timm import create_model
from timm.layers import SwiGLUPacked
from torchvision import transforms
from peft import LoraConfig, get_peft_model
import pickle

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")

# LoRA config (must match training)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Transform (same as val_transform in training)
val_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    folder = os.path.join(test_root, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label_val)

# Dataloader
test_dataset = InferenceDataset(image_paths, labels, val_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load models
num_folds = 5
model_paths = [f"virchow_lora_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    base_model = create_model(
        "hf-hub:paige-ai/Virchow",
        pretrained=True,
        mlp_layer=SwiGLUPacked,
        act_layer=nn.SiLU
    )
    base_model.reset_classifier(num_classes=1)
    base_model = base_model.to(device)

    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            images = images.to(device)
            outputs = model(images)

            if outputs.ndim == 3:
                outputs = outputs[:, 0]

            if outputs.ndim == 2 and outputs.size(1) > 1:
                if not hasattr(model, 'final_proj'):
                    model.final_proj = nn.Linear(outputs.size(1), 1).to(device)
                outputs = model.final_proj(outputs)

            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(1)
            elif outputs.ndim == 2 and outputs.size(1) != 1:
                outputs = outputs[:, :1]

            probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (Virchow LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "virchow_atnorm-br_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 47/47 [00:09<00:00,  4.83it/s]



Fold 1 - Balanced Accuracy: 0.7552, AUROC: 0.8343


Inference Fold 2: 100%|██████████| 47/47 [00:09<00:00,  4.87it/s]



Fold 2 - Balanced Accuracy: 0.7426, AUROC: 0.8270


Inference Fold 3: 100%|██████████| 47/47 [00:09<00:00,  4.88it/s]



Fold 3 - Balanced Accuracy: 0.7928, AUROC: 0.8794


Inference Fold 4: 100%|██████████| 47/47 [00:09<00:00,  4.86it/s]



Fold 4 - Balanced Accuracy: 0.7912, AUROC: 0.8673


Inference Fold 5: 100%|██████████| 47/47 [00:09<00:00,  4.78it/s]


Fold 5 - Balanced Accuracy: 0.7662, AUROC: 0.8619

--- Per-Fold Evaluation Summary (Virchow LoRA) ---
Balanced Accuracy: 0.7696 ± 0.0198
AUROC: 0.8540 ± 0.0200

Saved fold predictions and labels to: virchow_atnorm-br_test_predictions.pkl





## AtNorM-MD

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
from timm import create_model
from timm.layers import SwiGLUPacked
from torchvision import transforms
from peft import LoraConfig, get_peft_model
import pickle

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="Your HuggingFace Token Here")

# LoRA config (must match training)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Transform (same as val_transform in training)
val_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test images
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    folder = os.path.join(test_root, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label_val)

# Dataloader
test_dataset = InferenceDataset(image_paths, labels, val_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load models
num_folds = 5
model_paths = [f"virchow_lora_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    base_model = create_model(
        "hf-hub:paige-ai/Virchow",
        pretrained=True,
        mlp_layer=SwiGLUPacked,
        act_layer=nn.SiLU
    )
    base_model.reset_classifier(num_classes=1)
    base_model = base_model.to(device)

    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            images = images.to(device)
            outputs = model(images)

            if outputs.ndim == 3:
                outputs = outputs[:, 0]

            if outputs.ndim == 2 and outputs.size(1) > 1:
                if not hasattr(model, 'final_proj'):
                    model.final_proj = nn.Linear(outputs.size(1), 1).to(device)
                outputs = model.final_proj(outputs)

            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(1)
            elif outputs.ndim == 2 and outputs.size(1) != 1:
                outputs = outputs[:, :1]

            probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (Virchow LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "virchow_atnorm-md_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 132/132 [00:27<00:00,  4.81it/s]



Fold 1 - Balanced Accuracy: 0.7567, AUROC: 0.8412


Inference Fold 2: 100%|██████████| 132/132 [00:28<00:00,  4.71it/s]



Fold 2 - Balanced Accuracy: 0.7304, AUROC: 0.8288


Inference Fold 3: 100%|██████████| 132/132 [00:28<00:00,  4.70it/s]



Fold 3 - Balanced Accuracy: 0.7623, AUROC: 0.8924


Inference Fold 4: 100%|██████████| 132/132 [00:28<00:00,  4.67it/s]



Fold 4 - Balanced Accuracy: 0.8142, AUROC: 0.8832


Inference Fold 5: 100%|██████████| 132/132 [00:28<00:00,  4.65it/s]


Fold 5 - Balanced Accuracy: 0.7887, AUROC: 0.8749

--- Per-Fold Evaluation Summary (Virchow LoRA) ---
Balanced Accuracy: 0.7705 ± 0.0287
AUROC: 0.8641 ± 0.0247

Saved fold predictions and labels to: virchow_atnorm-md_test_predictions.pkl



