In [None]:
# =============================================================================
# Chest X-Ray Multi-label Classification Using Pretrained TorchXRayVision Model
#
# This script loads a cleaned test dataset of chest X-ray images and uses a 
# pretrained DenseNet (from torchxrayvision) to perform multi-label classification.
#
# The workflow is as follows:
# 1. Data Loading & Preprocessing:
#    - Load test data from a parquet file.
#    - Build a lookup dictionary for image paths.
# 2. Custom Dataset:
#    - Define a PyTorch Dataset to load images and associated labels.
# 3. Data Transformations:
#    - Apply resizing and other transformations.
# 4. Model Loading & Evaluation:
#    - Load a pretrained DenseNet from torchxrayvision.
#    - Run inference over the test set using an Ignite Engine.
# 5. Threshold Optimization:
#    - Compute ROC curves for each class and determine an optimal threshold.
# 6. Metrics & Visualization:
#    - Print classification reports and plot ROC curves.
#
# Note: Update file paths as necessary.
# =============================================================================

In [2]:
import os
import glob
import gc
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms
import torchvision.transforms.v2 as v2
from torchvision import models

import torchxrayvision as xrv

from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import EarlyStopping, ModelCheckpoint, TerminateOnNan
from ignite.contrib.handlers import ProgressBar
from ignite.metrics import Loss, Accuracy, Precision, Recall

from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import (classification_report, confusion_matrix, 
                             precision_score, recall_score, f1_score, roc_curve, auc)
torch.manual_seed(42)

  from tqdm.autonotebook import tqdm


<torch._C.Generator at 0x218da3be970>

In [3]:
# Data Loading and Preprocessing

# Define the root directory for the images.
record_path = r"C:\Users\piotr\.cache\kagglehub\datasets\nih-chest-xrays\data\versions\3"

# Gather all PNG image paths recursively.
image_paths = glob.glob(os.path.join(record_path, "**", "*.png"), recursive=True)

# Build a dictionary mapping image filename to its full path.
image_path_dict = {os.path.basename(path): path for path in image_paths}

# Load the cleaned test dataframe.
df_test = pd.read_parquet("df_test_cleaned.parquet")

FileNotFoundError: [Errno 2] No such file or directory: 'df_test_cleaned.parquet'

In [None]:
# Define Custom Dataset for X-Ray Images

class XrayDataset(Dataset):
    def __init__(self, df, image_path_dict, transform=None):
        self.df = df
        self.image_path_dict = image_path_dict  # Dictionary for quick lookup
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_name = self.df.iloc[idx]['Image Index']
        image_path = self.image_path_dict[image_name]
        
        img = cv2.imread(image_path)
        img = xrv.datasets.normalize(img, 255) # convert 8-bit image to [-1024, 1024] range
        img = img.mean(2)[None, ...] # Make single color channel
        label = self.df.iloc[idx].iloc[1:].values.astype(float)

        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(label)

# Use torchvision.transforms.v2 for consistent transforms.
transform = v2.Compose([
    # Resize images to 224x224 using XRayResizer provided by torchxrayvision.
    xrv.datasets.XRayResizer(224)
])

# Create the test dataset and DataLoader.
test_dataset = XrayDataset(df_test, image_path_dict, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

# List of disease labels from the test dataframe.
diseases = df_test.columns[1:].tolist()

In [None]:
# Model Loading and Evaluation Setup
# Set computation device.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pretrained DenseNet model from torchxrayvision.
model = xrv.models.DenseNet(weights="densenet121-res224-nih")
model.to(device)
model.eval()

# Containers to store predictions and true labels.
probs_list = []
labels_list = []

def evaluate_step(engine, batch):
    """
    Evaluation step for Ignite engine.
    Processes a batch of images, computes outputs, and stores probabilities and labels.
    """
    model.eval()
    with torch.no_grad():
        images, label = batch
        images = images.to(device)
        label = label.to(device)
        outputs = model(images)
        
        # Process each sample in the batch.
        for i in range(len(outputs)):
            # Map model outputs to a dictionary of pathology probabilities.
            dict_probs = dict(zip(model.pathologies, outputs[i].cpu().numpy()))
            # Extract only the probabilities for the relevant diseases.
            probs = [dict_probs[j] for j in diseases]
            
            # Store predictions and labels.
            probs_list.append(probs)
            labels_list.append(label[i].cpu().numpy())
    
    return outputs, label

# Create an Ignite evaluator for the test set.
evaluator = Engine(evaluate_step)
evaluator.run(test_loader)

# Convert the stored lists to numpy arrays.
probs_array = np.array(probs_list, dtype=object)
labels_array = np.array(labels_list, dtype=object)

State:
	iteration: 682
	epoch: 1
	epoch_length: 682
	max_epochs: 1
	output: <class 'tuple'>
	batch: <class 'list'>
	metrics: <class 'dict'>
	dataloader: <class 'torch.utils.data.dataloader.DataLoader'>
	seed: <class 'NoneType'>
	times: <class 'dict'>

In [None]:
thresholds = np.linspace(0, 1, 100)

# Store sum of distances to (0,1) for each threshold
sum_distance_per_threshold = []

for threshold in thresholds:
    preds = (probs_array > threshold).astype(int)  # Apply threshold
    total_distance = 0  # Sum of distances across all classes
    
    for i in range(labels_array.shape[1]):
        fpr, tpr, thr = roc_curve(labels_array[:, i].astype(int), probs_array[:, i])
        
        # Find the closest threshold index
        closest_idx = np.argmin(np.abs(thr - threshold))
        
        # Compute Euclidean distance to (0,1)
        distance = np.sqrt((fpr[closest_idx] - 0) ** 2 + (tpr[closest_idx] - 1) ** 2)
        total_distance += distance  # Sum distances across classes

    sum_distance_per_threshold.append(total_distance)

# Find the best threshold
best_threshold = thresholds[np.argmin(sum_distance_per_threshold)]
print(f"Optimal Threshold: {best_threshold:.4f}")

Optimal Threshold: 0.5253


In [None]:
# Apply sigmoid to get probabilities
probs_array = np.array(probs_list, dtype=object)
labels_array = np.array(labels_list, dtype=object)
preds = (probs_array > 0.5).astype(int)
# %% [markdown]
# ## Metrics on Test Set

y_true = labels_array.astype(int)
y_pred = preds.astype(int)

print("\nClassification Report (Per-Class):")
print(
    classification_report(
        y_true, y_pred, 
        target_names=[df_test.columns.tolist()[1:][i] for i in range(y_true.shape[1])]
    )
)

# Confusion matrix (argmax at the row level can be misleading for multi-label,
# but shown here for demonstration.)
cm = confusion_matrix(
    y_true.argmax(axis=1), 
    y_pred.argmax(axis=1)
)
print("\nConfusion Matrix:\n", cm)

# %% [markdown]
# ## ROC Curves (Optional)

plt.figure(figsize=(12, 8))
for i in range(y_true.shape[1]):
    fpr, tpr, _ = roc_curve(y_true[:, i], probs_array[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{df_test.columns.tolist()[1:][i]} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('RUC Curves for the XRayVision pretrained model')
plt.legend(loc='lower right')
plt.grid(True)
plt.savefig("AUC, the XRayVision pretrained model.png")
plt.show()


NameError: name 'np' is not defined

In [None]:
report_dict=classification_report(
        y_true, y_pred, 
        target_names=[df_test.columns.tolist()[1:][i] for i in range(y_true.shape[1])]
    ,output_dict=True)
df = pd.DataFrame(report_dict).transpose()

# Save it as markdown
print(df.drop(columns="support").to_markdown())

|                    |   precision |   recall |   f1-score |
|:-------------------|------------:|---------:|-----------:|
| Pneumothorax       |   0.118278  | 0.856403 |   0.20785  |
| Mass               |   0.0957942 | 0.841717 |   0.172012 |
| Atelectasis        |   0.159004  | 0.898336 |   0.270185 |
| Nodule             |   0.0785253 | 0.772305 |   0.142556 |
| Consolidation      |   0.0842982 | 0.967277 |   0.155081 |
| Pleural_Thickening |   0.057971  | 0.842333 |   0.108476 |
| Effusion           |   0.256537  | 0.872703 |   0.396515 |
| Infiltration       |   0.240613  | 0.92679  |   0.382041 |
| micro avg          |   0.13907   | 0.88517  |   0.240375 |
| macro avg          |   0.136378  | 0.872233 |   0.22934  |
| weighted avg       |   0.173431  | 0.88517  |   0.283441 |
| samples avg        |   0.124396  | 0.478369 |   0.186056 |
