In [1]:
import os
import time
import hashlib
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MultiLabelBinarizer

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms
import timm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Load CSV data
df = pd.read_csv("/student/csc490_project/shared/labels.csv")

# Define the 15 classes (14 diseases plus "No Finding")
CLASSES = [
    "No Finding", "Atelectasis", "Cardiomegaly", "Effusion", "Infiltration", 
    "Mass", "Nodule", "Pneumonia", "Pneumothorax", "Consolidation", 
    "Edema", "Emphysema", "Fibrosis", "Pleural_Thickening", "Hernia"
]

# Process the "Finding Labels" column (split by '|' so that "No Finding" becomes ["No Finding"])
def process_labels(label_str):
    return label_str.split("|")

df["label_list"] = df["Finding Labels"].apply(process_labels)

# Initialize MultiLabelBinarizer with the defined CLASSES
mlb = MultiLabelBinarizer(classes=CLASSES)
labels_array = mlb.fit_transform(df["label_list"])

# Save the one-hot encoded labels into a new column
df["labels"] = list(labels_array)

# Print some samples to verify order
print("MultiLabelBinarizer class order:", mlb.classes_)
print("First few processed labels (raw):", df["label_list"].head().tolist())
print("First few processed labels (one-hot):", df["labels"].head().tolist())


MultiLabelBinarizer class order: ['No Finding' 'Atelectasis' 'Cardiomegaly' 'Effusion' 'Infiltration'
 'Mass' 'Nodule' 'Pneumonia' 'Pneumothorax' 'Consolidation' 'Edema'
 'Emphysema' 'Fibrosis' 'Pleural_Thickening' 'Hernia']
First few processed labels (raw): [['Cardiomegaly'], ['Cardiomegaly', 'Emphysema'], ['Cardiomegaly', 'Effusion'], ['No Finding'], ['Hernia']]
First few processed labels (one-hot): [array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]), array([0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])]


In [None]:
# Compose training transforms: apply augmentation, then convert to 3 channels and apply other augmentations.
train_transforms_list = []
train_transforms_list += [
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
train_transform = transforms.Compose(train_transforms_list)

# Validation transforms: deterministic preprocessing only
val_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [5]:
class ChestXrayDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        """
        Args:
            df: DataFrame with "Image Index" and "labels" columns.
            root_dir: Directory where images are stored.
            transform: Transformations to apply.
        """
        self.df = df
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.df.iloc[idx]["Image Index"])
        image = Image.open(img_name).convert("L")  # load in grayscale
        
        # Get the one-hot label vector
        labels = np.array(self.df.iloc[idx]["labels"], dtype=np.float32)
        labels = torch.tensor(labels, dtype=torch.float)
        
        if self.transform:
            image = self.transform(image)
        return image, labels

In [None]:
def train_and_evaluate(model_name, train_df, val_df, epochs=7, batch_size=16):
    # Adjust epochs for specific models
    if model_name == 'convnext_large.fb_in22k':
        epochs = 4

    # Create model using timm
    model = timm.create_model(model_name, pretrained=True, num_classes=len(CLASSES))
    model = model.to(device)
    
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    
    # Create DataLoaders
    train_dataset = ChestXrayDataset(train_df, 
                                     root_dir="/student/csc490_project/shared/preprocessed_images/preprocessed_images", 
                                     transform=train_transform)
    val_dataset = ChestXrayDataset(val_df, 
                                   root_dir="/student/csc490_project/shared/preprocessed_images/preprocessed_images", 
                                   transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
        epoch_loss = running_loss / len(train_dataset)
        print(f"Model: {model_name} | Epoch {epoch+1}/{epochs} | Training Loss: {epoch_loss:.4f}")
    
    # Evaluation loop
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.sigmoid(outputs).cpu().numpy()
            all_preds.append(preds)
            all_labels.append(labels.cpu().numpy())
    
    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)
    
    # Compute AUROC for each class
    auroc_per_class = {}
    for i, class_name in enumerate(CLASSES):
        try:
            auroc = roc_auc_score(all_labels[:, i], all_preds[:, i])
        except ValueError:
            auroc = float('nan')
        auroc_per_class[class_name] = auroc
    overall_auroc = np.nanmean(list(auroc_per_class.values()))
    
    return model, auroc_per_class, overall_auroc

In [7]:
model_name = 'swin_large_patch4_window7_224'

In [None]:
from sklearn.model_selection import KFold
import numpy as np

# Extract unique patient IDs
unique_patients = df["Patient ID"].unique()

# Shuffle them for reproducible random folds
np.random.seed(42)
np.random.shuffle(unique_patients)

# KFold on patient IDs: 4 folds
n_splits = 4
kf = KFold(n_splits=n_splits, shuffle=False)  # We already shuffled above

# Create a list of (train_pat_idx, val_pat_idx) for each fold
folds = list(kf.split(unique_patients))

# Containers for per-fold results
fold_overall_scores = []
fold_class_scores = []
fold_models = []

for fold_idx, (train_pat_idx, val_pat_idx) in enumerate(folds):
    print(f"\n--- Fold {fold_idx+1} ---")
    
    # Identify patient IDs for training vs. validation
    train_patients = unique_patients[train_pat_idx]
    val_patients = unique_patients[val_pat_idx]
    
    # Print a few patient IDs to verify the split
    print(f"Train patient IDs (first 5): {train_patients[:5]}")
    print(f"Val patient IDs (first 5): {val_patients[:5]}")
    
    # Filter the main DataFrame by these patient IDs
    train_fold_df = df[df["Patient ID"].isin(train_patients)].reset_index(drop=True)
    val_fold_df = df[df["Patient ID"].isin(val_patients)].reset_index(drop=True)
    
    # Train and evaluate the model on this fold
    model, auroc_per_class, overall_auroc = train_and_evaluate(
        model_name, 
        train_fold_df, 
        val_fold_df, 
        epochs=7, 
        batch_size=16
    )
    
    # Save results
    fold_models.append(model)
    fold_class_scores.append(auroc_per_class)
    fold_overall_scores.append(overall_auroc)
    
    # Print fold results
    print("Per-class AUROC for this fold:", auroc_per_class)
    print("Overall AUROC for this fold:", overall_auroc)
    
    # Save the model for this fold
    fold_model_save_path = f"/student/csc490_project/shared/cv_no_overlap/{model_name}_fold{fold_idx+1}_model.pth"
    torch.save(model.state_dict(), fold_model_save_path)
    print(f"Model for Fold {fold_idx+1} saved to {fold_model_save_path}")



--- Fold 1 ---
Train indices (first 5): [0 1 2 4 5]
Val indices (first 5): [ 3 23 24 31 35]
Model: swin_large_patch4_window7_224 | Epoch 1/7 | Training Loss: 0.1918
Model: swin_large_patch4_window7_224 | Epoch 2/7 | Training Loss: 0.1826
Model: swin_large_patch4_window7_224 | Epoch 3/7 | Training Loss: 0.1818
Model: swin_large_patch4_window7_224 | Epoch 4/7 | Training Loss: 0.1815
Model: swin_large_patch4_window7_224 | Epoch 5/7 | Training Loss: 0.1812
Model: swin_large_patch4_window7_224 | Epoch 6/7 | Training Loss: 0.1806
Model: swin_large_patch4_window7_224 | Epoch 7/7 | Training Loss: 0.1790
Per-class AUROC for this fold: {'No Finding': np.float64(0.7757228493431308), 'Atelectasis': np.float64(0.7969011926833625), 'Cardiomegaly': np.float64(0.9004284018637561), 'Effusion': np.float64(0.8714618294815274), 'Infiltration': np.float64(0.7069278518945811), 'Mass': np.float64(0.8525617154705323), 'Nodule': np.float64(0.7392565017579583), 'Pneumonia': np.float64(0.7370958098557995), 'Pne

In [None]:
# Compute final metrics across folds
avg_overall_auroc = np.mean(fold_overall_scores)
std_overall_auroc = np.std(fold_overall_scores)

print("\n=== Final Cross Validation Results ===")
for i, score in enumerate(fold_overall_scores):
    print(f"Fold {i+1} Overall AUROC: {score:.4f}")
print(f"Average Overall AUROC: {avg_overall_auroc:.4f}")
print(f"Standard Deviation of Overall AUROC: {std_overall_auroc:.4f}")

# Compute average per-class AUROC
avg_class_auroc = {}
for class_name in CLASSES:
    class_scores = [fold[class_name] for fold in fold_class_scores]
    avg_class_auroc[class_name] = np.nanmean(class_scores)
print("Average Per-class AUROC:", avg_class_auroc)


=== Final Cross Validation Results ===
Fold 1 Overall AUROC: 0.8111
Fold 2 Overall AUROC: 0.8421
Fold 3 Overall AUROC: 0.8369
Fold 4 Overall AUROC: 0.8425
Fold 5 Overall AUROC: 0.8383
Average Overall AUROC: 0.8342
Standard Deviation of Overall AUROC: 0.0118
Average Per-class AUROC: {'No Finding': np.float64(0.7840629407983292), 'Atelectasis': np.float64(0.8140148534860007), 'Cardiomegaly': np.float64(0.9072401783438397), 'Effusion': np.float64(0.8821612479701747), 'Infiltration': np.float64(0.7190986173582831), 'Mass': np.float64(0.8572114105691714), 'Nodule': np.float64(0.7607789508640679), 'Pneumonia': np.float64(0.7641870034775156), 'Pneumothorax': np.float64(0.8914130149789129), 'Consolidation': np.float64(0.8104549067660731), 'Edema': np.float64(0.9027589142979335), 'Emphysema': np.float64(0.9237786204895077), 'Fibrosis': np.float64(0.814705326196999), 'Pleural_Thickening': np.float64(0.803173651291935), 'Hernia': np.float64(0.8777351210407456)}
