In [None]:
# Load test data

import torch
from torch.utils.data import Dataset
import pandas as pd
from PIL import Image
import os

# Custom Dataset
class ChestXrayDataset(Dataset):
    def __init__(self, df:pd.DataFrame, transform=None):
        self.df = df
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['path']
        label = self.df.iloc[idx]['label']
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image, label
    
def resolve_data_path(data_dir: str):
    normal_dir = os.path.join(data_dir, 'NORMAL')
    pneumonia_dir = os.path.join(data_dir, 'PNEUMONIA')

    normal_paths = [os.path.join(data_dir, 'NORMAL', f) for f in os.listdir(normal_dir) if f.endswith(('.jpeg', '.jpg', '.png'))]
    pneumonia_paths = [os.path.join(data_dir, 'PNEUMONIA', f) for f in os.listdir(pneumonia_dir) if f.endswith(('.jpeg', '.jpg', '.png'))]
    pathes = normal_paths + pneumonia_paths
    labels = [0]*len(normal_paths) + [1]*len(pneumonia_paths)
    return pd.DataFrame({'path': pathes, 'label': labels})

test_df = resolve_data_path("datasets/test")
test_df.head(10)

Unnamed: 0,path,label
0,datasets/test/NORMAL/NORMAL2-IM-0246-0001-0001...,0
1,datasets/test/NORMAL/IM-0073-0001.jpeg,0
2,datasets/test/NORMAL/IM-0069-0001.jpeg,0
3,datasets/test/NORMAL/IM-0005-0001.jpeg,0
4,datasets/test/NORMAL/NORMAL2-IM-0368-0001.jpeg,0
5,datasets/test/NORMAL/NORMAL2-IM-0251-0001.jpeg,0
6,datasets/test/NORMAL/NORMAL2-IM-0280-0001.jpeg,0
7,datasets/test/NORMAL/NORMAL2-IM-0307-0001.jpeg,0
8,datasets/test/NORMAL/IM-0039-0001.jpeg,0
9,datasets/test/NORMAL/NORMAL2-IM-0300-0001.jpeg,0


In [3]:
# Transforms and Data loader
from torchvision import transforms
from torch.utils.data import DataLoader

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = ChestXrayDataset(test_df, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
# Load model
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_model(model_path: str):
    model = models.resnet50(weights=None)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, 2)  # Binary classification
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model

# Evaluation function
def eval_epoch(model, dataloader, criterion):
    model.eval()
    
    all_preds, all_trues, all_probs = [], [], []
    val_loss = 0.0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item() * inputs.size(0)
            total += labels.size(0)
            # q: why [:, 1]?
            # a: get the probabilities for the positive class
            probs = torch.softmax(outputs, dim=1)[:, 1]
            preds = (probs >= 0.5).float()

            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_trues.extend(labels.cpu().numpy())
    val_loss /= total
    return val_loss, all_preds, all_trues, all_probs

In [8]:
# Test all the models and get the best one 
import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import glob
model_paths = glob.glob("*.pth")

criterion = nn.CrossEntropyLoss()
best_auc = 0.0
best_model_path = ""    
for model_path in model_paths:
    model = load_model(model_path).to(device)
    val_loss, all_preds, all_trues, all_probs = eval_epoch(model, test_loader, criterion)
    
    acc = accuracy_score(all_trues, all_preds)
    prec = precision_score(all_trues, all_preds)
    rec = recall_score(all_trues, all_preds)
    f1 = f1_score(all_trues, all_preds)
    auc = roc_auc_score(all_trues, all_probs)
    
    print(f"Model: {model_path}")
    print(f"Test Loss: {val_loss:.4f}, Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}, F1: {f1:.4f}, AUC: {auc:.4f}")
    
    if auc > best_auc:
        best_auc = auc
        best_model_path = model_path

print(f"Best Model: {best_model_path} with AUC: {best_auc:.4f}")

Model: best_model_fold_1.pth
Test Loss: 0.9217, Acc: 0.8478, Prec: 0.8054, Rec: 0.9974, F1: 0.8912, AUC: 0.9620
Model: best_model_fold_2.pth
Test Loss: 0.5701, Acc: 0.8622, Prec: 0.8262, Rec: 0.9872, F1: 0.8995, AUC: 0.9740
Model: best_model_fold_5.pth
Test Loss: 0.8975, Acc: 0.8221, Prec: 0.7796, Rec: 0.9974, F1: 0.8751, AUC: 0.9619
Model: best_model_fold_4.pth
Test Loss: 0.7574, Acc: 0.8285, Prec: 0.7859, Rec: 0.9974, F1: 0.8791, AUC: 0.9663
Model: best_model_fold_3.pth
Test Loss: 0.6809, Acc: 0.8510, Prec: 0.8087, Rec: 0.9974, F1: 0.8932, AUC: 0.9733
Best Model: best_model_fold_2.pth with AUC: 0.9740
