VGG16 CXR

In [1]:

from torch.utils.data import Dataset
import random

class XRayDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []

        # Loop through each label directory
        for label in [0, 1]:
            label_dir = os.path.join(root_dir, f'processed_label_{label}')
            for folder_name in os.listdir(label_dir):
                folder_path = os.path.join(label_dir, folder_name)
                image_name = os.listdir(folder_path)[0]  # Assuming only one image per folder
                if image_name.endswith('.jpg'):
                    self.images.append(os.path.join(folder_path, image_name))
                    self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label
    
    def get_labels(self):
        return self.labels


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Subset, random_split
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import StratifiedKFold
import numpy as np
import os
from PIL import Image
from torch.utils.data import Dataset

def set_seed(seed_value):
    """Set seed for reproducibility."""
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set a seed value
seed = 42
set_seed(seed)


def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    return running_loss / len(train_loader.dataset)

def evaluate(model, val_loader, device):
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    return accuracy_score(all_labels, all_preds), roc_auc_score(all_labels, all_preds)

def main():
    # Initialize device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set seed for reproducibility
    set_seed(42)

    # Initialize dataset
    xray_dataset = XRayDataset(root_dir='D:/Aspire_xray/xray', transform=transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]))

    # Prepare Stratified K-Fold
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    accuracies = []
    aucs = []
    num_epochs = 10  # Specify the number of epochs

    for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(xray_dataset)), xray_dataset.get_labels())):

        # Split dataset
        train_subset = Subset(xray_dataset, train_idx)
        val_subset = Subset(xray_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=32)

        # Initialize and modify the pre-trained ResNet-50 model
        model = models.resnet50(pretrained=True)  # Load with pretrained weights
        for param in model.parameters():
            param.requires_grad = False  # Freeze pretrained weights
        
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)  # Adjust for binary classification
        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Train for multiple epochs
        for epoch in range(num_epochs):
            train_loss = train(model, train_loader, criterion, optimizer, device)
            accuracy, auc = evaluate(model, val_loader, device)
        

        accuracies.append(accuracy)
        aucs.append(auc)


    print(f"Mean Accuracy: {np.mean(accuracies):.4f}, STD: {np.std(accuracies):.4f}")
    print(f"Mean AUC: {np.mean(aucs):.4f}, STD: {np.std(aucs):.4f}")

if __name__ == "__main__":
    main()




Mean Accuracy: 0.695, STD: 0.06
Mean AUC: 0.581, STD: 0.04
