## Ami-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
from timm import create_model
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
from peft import get_peft_model, LoraConfig
import timm
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="your_huggingface_token_here")  # Replace with your Hugging Face token

# LoRA config (same as training)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Model creation kwargs (same as training)
timm_kwargs = {
    'img_size': 224,
    'patch_size': 14,
    'depth': 24,
    'num_heads': 24,
    'init_values': 1e-5,
    'embed_dim': 1536,
    'mlp_ratio': 2.66667 * 2,
    'num_classes': 1,
    'no_embed_class': True,
    'mlp_layer': timm.layers.SwiGLUPacked,
    'act_layer': nn.SiLU,
    'reg_tokens': 8,
    'dynamic_img_size': True
}

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AMi-Br/Test"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    folder = os.path.join(test_root, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label_val)

# Create transforms using model config
dummy_model = create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs)
val_transform = create_transform(**resolve_data_config(dummy_model.pretrained_cfg, model=dummy_model))

# Prepare test set
test_dataset = InferenceDataset(image_paths, labels, val_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load trained fold models
num_folds = 5
model_paths = [f"uni2h_lora_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    base_model = create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs).to(device)
    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference and evaluation
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            images = images.to(device)
            outputs = model(images)

            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(1)
            elif outputs.ndim == 2 and outputs.size(1) != 1:
                outputs = outputs[:, :1]

            probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (UNI2-h LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "uni2h_amibr_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 52/52 [00:12<00:00,  4.32it/s]



Fold 1 - Balanced Accuracy: 0.7176, AUROC: 0.8135


Inference Fold 2: 100%|██████████| 52/52 [00:11<00:00,  4.36it/s]



Fold 2 - Balanced Accuracy: 0.7021, AUROC: 0.7816


Inference Fold 3: 100%|██████████| 52/52 [00:11<00:00,  4.45it/s]



Fold 3 - Balanced Accuracy: 0.7025, AUROC: 0.8453


Inference Fold 4: 100%|██████████| 52/52 [00:11<00:00,  4.55it/s]



Fold 4 - Balanced Accuracy: 0.7118, AUROC: 0.8088


Inference Fold 5: 100%|██████████| 52/52 [00:11<00:00,  4.57it/s]


Fold 5 - Balanced Accuracy: 0.7351, AUROC: 0.8272

--- Per-Fold Evaluation Summary (UNI2-h LoRA) ---
Balanced Accuracy: 0.7138 ± 0.0121
AUROC: 0.8153 ± 0.0211

Saved fold predictions and labels to: uni2h_amibr_test_predictions.pkl





## AtNorM-Br

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
from timm import create_model
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
from peft import get_peft_model, LoraConfig
import timm
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="your_huggingface_token_here")  # Replace with your Hugging Face token

# LoRA config (same as training)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Model creation kwargs (same as training)
timm_kwargs = {
    'img_size': 224,
    'patch_size': 14,
    'depth': 24,
    'num_heads': 24,
    'init_values': 1e-5,
    'embed_dim': 1536,
    'mlp_ratio': 2.66667 * 2,
    'num_classes': 1,
    'no_embed_class': True,
    'mlp_layer': timm.layers.SwiGLUPacked,
    'act_layer': nn.SiLU,
    'reg_tokens': 8,
    'dynamic_img_size': True
}

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-Br"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    folder = os.path.join(test_root, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label_val)

# Create transforms using model config
dummy_model = create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs)
val_transform = create_transform(**resolve_data_config(dummy_model.pretrained_cfg, model=dummy_model))

# Prepare test set
test_dataset = InferenceDataset(image_paths, labels, val_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load trained fold models
num_folds = 5
model_paths = [f"uni2h_lora_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    base_model = create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs).to(device)
    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference and evaluation
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            images = images.to(device)
            outputs = model(images)

            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(1)
            elif outputs.ndim == 2 and outputs.size(1) != 1:
                outputs = outputs[:, :1]

            probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (UNI2-h LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "uni2h_atnorm-br_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 47/47 [00:10<00:00,  4.64it/s]



Fold 1 - Balanced Accuracy: 0.7087, AUROC: 0.8088


Inference Fold 2: 100%|██████████| 47/47 [00:10<00:00,  4.64it/s]



Fold 2 - Balanced Accuracy: 0.7226, AUROC: 0.8081


Inference Fold 3: 100%|██████████| 47/47 [00:10<00:00,  4.59it/s]



Fold 3 - Balanced Accuracy: 0.7508, AUROC: 0.8435


Inference Fold 4: 100%|██████████| 47/47 [00:10<00:00,  4.38it/s]



Fold 4 - Balanced Accuracy: 0.7436, AUROC: 0.8355


Inference Fold 5: 100%|██████████| 47/47 [00:10<00:00,  4.33it/s]


Fold 5 - Balanced Accuracy: 0.7249, AUROC: 0.8181

--- Per-Fold Evaluation Summary (UNI2-h LoRA) ---
Balanced Accuracy: 0.7301 ± 0.0152
AUROC: 0.8228 ± 0.0143

Saved fold predictions and labels to: uni2h_atnorm-br_test_predictions.pkl





## AtNorM-MD

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from huggingface_hub import login
from timm import create_model
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
from peft import get_peft_model, LoraConfig
import timm
import pickle

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hugging Face login
login(token="your_huggingface_token_here")  # Replace with your Hugging Face token

# LoRA config (same as training)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["qkv", "proj", "fc1", "fc2"],
    lora_dropout=0.3,
    bias="none",
    modules_to_save=["head"]
)

# Model creation kwargs (same as training)
timm_kwargs = {
    'img_size': 224,
    'patch_size': 14,
    'depth': 24,
    'num_heads': 24,
    'init_values': 1e-5,
    'embed_dim': 1536,
    'mlp_ratio': 2.66667 * 2,
    'num_classes': 1,
    'no_embed_class': True,
    'mlp_layer': timm.layers.SwiGLUPacked,
    'act_layer': nn.SiLU,
    'reg_tokens': 8,
    'dynamic_img_size': True
}

# Dataset
class InferenceDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32).unsqueeze(0)
        return image, label

# Load test data
test_root = "/data/MELBA-AmiBr/Datasets_Stratified/AtNorM-MD"
class_map = {"Atypical": 0, "Normal": 1}
image_paths, labels = [], []

for class_name, label_val in class_map.items():
    folder = os.path.join(test_root, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.tif')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label_val)

# Create transforms using model config
dummy_model = create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs)
val_transform = create_transform(**resolve_data_config(dummy_model.pretrained_cfg, model=dummy_model))

# Prepare test set
test_dataset = InferenceDataset(image_paths, labels, val_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True)

# Load trained fold models
num_folds = 5
model_paths = [f"uni2h_lora_fold_{i + 1}_best.pth" for i in range(num_folds)]
models = []

for path in model_paths:
    base_model = create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs).to(device)
    model = get_peft_model(base_model, lora_config)
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    models.append(model)

# Inference and evaluation
true_labels = np.array(labels)
fold_bal_accs, fold_aurocs = [], []
fold_probs_dict = {}

for i, model in enumerate(models):
    fold_probs = []

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc=f"Inference Fold {i + 1}"):
            images = images.to(device)
            outputs = model(images)

            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(1)
            elif outputs.ndim == 2 and outputs.size(1) != 1:
                outputs = outputs[:, :1]

            probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
            fold_probs.extend(probs)

    fold_probs = np.array(fold_probs)
    fold_preds = (fold_probs > 0.5).astype(int)

    bal_acc = balanced_accuracy_score(true_labels, fold_preds)
    auroc = roc_auc_score(true_labels, fold_probs)

    fold_bal_accs.append(bal_acc)
    fold_aurocs.append(auroc)

    print(f"\nFold {i + 1} - Balanced Accuracy: {bal_acc:.4f}, AUROC: {auroc:.4f}")

    fold_probs_dict[f"fold_{i + 1}"] = {
        "probs": fold_probs,
        "preds": fold_preds,
        "true_labels": true_labels
    }

# Summary
mean_bal_acc = np.mean(fold_bal_accs)
std_bal_acc = np.std(fold_bal_accs)
mean_auroc = np.mean(fold_aurocs)
std_auroc = np.std(fold_aurocs)

print("\n--- Per-Fold Evaluation Summary (UNI2-h LoRA) ---")
print(f"Balanced Accuracy: {mean_bal_acc:.4f} ± {std_bal_acc:.4f}")
print(f"AUROC: {mean_auroc:.4f} ± {std_auroc:.4f}")

# Save predictions
output_path = "uni2h_atnorm-md_test_predictions.pkl"
with open(output_path, "wb") as f:
    pickle.dump(fold_probs_dict, f)

print(f"\nSaved fold predictions and labels to: {output_path}")


  model.load_state_dict(torch.load(path, map_location=device))
Inference Fold 1: 100%|██████████| 132/132 [00:29<00:00,  4.54it/s]



Fold 1 - Balanced Accuracy: 0.6889, AUROC: 0.7751


Inference Fold 2: 100%|██████████| 132/132 [00:29<00:00,  4.53it/s]



Fold 2 - Balanced Accuracy: 0.6635, AUROC: 0.7237


Inference Fold 3: 100%|██████████| 132/132 [00:29<00:00,  4.54it/s]



Fold 3 - Balanced Accuracy: 0.6750, AUROC: 0.7590


Inference Fold 4: 100%|██████████| 132/132 [00:29<00:00,  4.53it/s]



Fold 4 - Balanced Accuracy: 0.6759, AUROC: 0.7176


Inference Fold 5: 100%|██████████| 132/132 [00:29<00:00,  4.52it/s]


Fold 5 - Balanced Accuracy: 0.7535, AUROC: 0.8327

--- Per-Fold Evaluation Summary (UNI2-h LoRA) ---
Balanced Accuracy: 0.6914 ± 0.0321
AUROC: 0.7616 ± 0.0415

Saved fold predictions and labels to: uni2h_atnorm-md_test_predictions.pkl



