In [2]:
import os
import itertools
import numpy as np
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import roc_auc_score

import timm

CLASSES = [
    "No Finding", "Atelectasis", "Cardiomegaly", "Effusion", "Infiltration",
    "Mass", "Nodule", "Pneumonia", "Pneumothorax", "Consolidation",
    "Edema", "Emphysema", "Fibrosis", "Pleural_Thickening", "Hernia"
]

class GammaCorrectionTransform:
    def __init__(self, gamma=1.0):
        self.gamma = gamma

    def __call__(self, img):
        img_np = np.array(img).astype(np.float32) / 255.0
        corrected = np.power(img_np, self.gamma)
        corrected = np.clip(corrected * 255, 0, 255).astype(np.uint8)
        return Image.fromarray(corrected)

df = pd.read_csv("/student/csc490_project/shared/labels.csv")
df["label_list"] = df["Finding Labels"].apply(lambda x: x.split("|"))

mlb = MultiLabelBinarizer(classes=CLASSES)
labels_array = mlb.fit_transform(df["label_list"])
df["labels"] = list(labels_array)

# Patient-level split: 70% train, 10% val, 20% test
unique_patients = df["Patient ID"].unique()
np.random.seed(42)
np.random.shuffle(unique_patients)

train_end = int(0.7 * len(unique_patients))
val_end = int(0.8 * len(unique_patients))

train_patients = unique_patients[:train_end]
val_patients = unique_patients[train_end:val_end]
test_patients = unique_patients[val_end:]

train_df = df[df["Patient ID"].isin(train_patients)].reset_index(drop=True)
val_df = df[df["Patient ID"].isin(val_patients)].reset_index(drop=True)
test_df = df[df["Patient ID"].isin(test_patients)].reset_index(drop=True)

class ChestXrayDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.df.iloc[idx]["Image Index"])
        image = Image.open(img_path).convert("L")
        labels = torch.tensor(self.df.iloc[idx]["labels"], dtype=torch.float)
        if self.transform:
            image = self.transform(image)
        return image, labels

train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

img_dir = "/student/csc490_project/shared/preprocessed_images/preprocessed_images"
train_dataset = ChestXrayDataset(train_df, img_dir, train_transform)
val_dataset = ChestXrayDataset(val_df, img_dir, val_transform)
test_dataset = ChestXrayDataset(test_df, img_dir, val_transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models = {
    'maxvit': timm.create_model('maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k', pretrained=False, num_classes=15),
    'densenet': timm.create_model('densenet121', pretrained=False, num_classes=15),
    'coatnet': timm.create_model('coatnet_2_rw_224.sw_in12k_ft_in1k', pretrained=False, num_classes=15),
    'vgg19': timm.create_model('vgg19.tv_in1k', pretrained=False, num_classes=15),
    'swin': timm.create_model('swin_large_patch4_window7_224', pretrained=False, num_classes=15),
    'convnext': timm.create_model('convnext_large.fb_in22k', pretrained=False, num_classes=15)
}

models['maxvit'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k_model.pth'))
models['densenet'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_densenet121_model.pth'))
models['coatnet'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_coatnet_2_rw_224.sw_in12k_ft_in1k_model.pth'))
models['vgg19'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_vgg19.tv_in1k_model.pth'))
models['swin'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_swin_large_patch4_window7_224_model.pth'))
models['convnext'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_convnext_large.fb_in22k_model.pth'))

for model in models.values():
    model.to(device)
    model.eval()

  models['maxvit'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k_model.pth'))
  models['densenet'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_densenet121_model.pth'))
  models['coatnet'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_coatnet_2_rw_224.sw_in12k_ft_in1k_model.pth'))
  models['vgg19'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_vgg19.tv_in1k_model.pth'))
  models['swin'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_swin_large_patch4_window7_224_model.pth'))
  models['convnext'].load_state_dict(torch.load('/student/csc490_project/shared/new_split_models/no_augment_convnext_large.fb_in22k_model.pth'))


In [4]:
# Function to evaluate a model on the test set
def evaluate_model(model, dataloader, device):
    all_targets = []
    all_outputs = []

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            outputs = torch.sigmoid(outputs)

            all_targets.append(labels.cpu().numpy())
            all_outputs.append(outputs.cpu().numpy())

    all_targets = np.vstack(all_targets)
    all_outputs = np.vstack(all_outputs)

    auroc_scores = []
    for i in range(len(CLASSES)):
        try:
            score = roc_auc_score(all_targets[:, i], all_outputs[:, i])
            auroc_scores.append(score)
        except ValueError:
            auroc_scores.append(float('nan'))

    return auroc_scores

# Loop through each model and print AUROC scores
for model_name, model in models.items():
    print(f"Evaluating {model_name}...")
    auroc_scores = evaluate_model(model, test_loader, device)

    print(f"AUROC Scores for {model_name}:")
    for cls, score in zip(CLASSES, auroc_scores):
        print(f"{cls}: {score:.4f}")
    print(f"Mean AUROC: {np.nanmean(auroc_scores):.4f}")
    print("-" * 50)


Evaluating maxvit...
AUROC Scores for maxvit:
No Finding: 0.7865
Atelectasis: 0.8157
Cardiomegaly: 0.8958
Effusion: 0.8831
Infiltration: 0.7187
Mass: 0.8622
Nodule: 0.7851
Pneumonia: 0.7533
Pneumothorax: 0.8785
Consolidation: 0.8072
Edema: 0.9028
Emphysema: 0.9291
Fibrosis: 0.8369
Pleural_Thickening: 0.8143
Hernia: 0.9077
Mean AUROC: 0.8385
--------------------------------------------------
Evaluating densenet...
AUROC Scores for densenet:
No Finding: 0.7867
Atelectasis: 0.8156
Cardiomegaly: 0.8871
Effusion: 0.8821
Infiltration: 0.7166
Mass: 0.8513
Nodule: 0.7687
Pneumonia: 0.7795
Pneumothorax: 0.8760
Consolidation: 0.8044
Edema: 0.9023
Emphysema: 0.9190
Fibrosis: 0.8106
Pleural_Thickening: 0.8285
Hernia: 0.8451
Mean AUROC: 0.8316
--------------------------------------------------
Evaluating coatnet...
AUROC Scores for coatnet:
No Finding: 0.7895
Atelectasis: 0.8149
Cardiomegaly: 0.8947
Effusion: 0.8847
Infiltration: 0.7257
Mass: 0.8624
Nodule: 0.7801
Pneumonia: 0.7675
Pneumothorax: 0.