In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("nih-chest-xrays/data")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/nih-chest-xrays/data?dataset_version_number=3...


100%|██████████| 42.0G/42.0G [32:42<00:00, 23.0MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/nih-chest-xrays/data/versions/3


In [8]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from PIL import Image

In [10]:
class ChestXRayDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None, multi_label=True):
        """
        Custom Dataset for Chest X-Ray images

        Args:
            dataframe (pd.DataFrame): DataFrame containing image metadata
            image_dir (str): Directory containing X-ray images
            transform (callable, optional): Optional transform to be applied on an image
            multi_label (bool): Whether classification is multi-label
        """
        self.image_dir = image_dir
        self.transform = transform

        # Preprocessing labels
        self.multi_label = multi_label
        self.labels = dataframe['Finding Labels'].str.split('|')

        # Create unique label list
        all_labels = [label for sublist in self.labels for label in sublist]
        self.unique_labels = list(set(all_labels))

        # MultiLabelBinarizer for encoding
        self.mlb = MultiLabelBinarizer()
        self.mlb.fit([self.unique_labels])

        # Image filenames
        self.image_files = dataframe['Image Index'].tolist()

        # Encode labels
        self.encoded_labels = self.mlb.transform(self.labels)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(img_name).convert('RGB')

        if self.transform:
            image = self.transform(image)

        label = self.encoded_labels[idx]
        return image, torch.FloatTensor(label)

    def get_num_classes(self):
        return len(self.unique_labels)

    def get_class_names(self):
        return self.unique_labels

In [14]:
def create_transforms():
    """Create data augmentation and normalization transforms"""
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])

In [12]:
def calculate_accuracy(outputs, targets, threshold=0.5):
    """
    Calculate accuracy for multi-label classification

    Args:
        outputs (torch.Tensor): Model predictions (logits)
        targets (torch.Tensor): True labels
        threshold (float): Threshold for positive prediction

    Returns:
        float: Accuracy of predictions
    """
    # Apply sigmoid to get probabilities
    probs = torch.sigmoid(outputs)

    # Convert to binary predictions based on threshold
    preds = (probs >= threshold).float()

    # Calculate per-sample accuracy (exact match)
    exact_match = torch.all(preds == targets, dim=1).float()
    accuracy = exact_match.mean().item()

    return accuracy

In [20]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=5):
    """
    Train the ResNet model

    Args:
        model (nn.Module): ResNet model
        dataloaders (dict): Dictionary of train and validation dataloaders
        criterion (nn.Module): Loss function
        optimizer (torch.optim): Optimizer
        num_epochs (int): Number of training epochs
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    best_loss = float('inf')

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_accuracy = 0.0

            with torch.set_grad_enabled(phase == 'train'):
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    optimizer.zero_grad()

                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.item() * inputs.size(0)

                    # Calculate accuracy
                    accuracy = calculate_accuracy(outputs, labels)
                    running_accuracy += accuracy * inputs.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_accuracy = running_accuracy / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}, {phase} Accuracy: {epoch_accuracy:.4f}')

            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                torch.save(model.state_dict(), 'best_model.pth')

In [21]:
def main():
    # Load metadata
    df = pd.read_csv('3/Data_Entry_2017.csv')

    # Split data
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

    # Create datasets and dataloaders
    transform = create_transforms()

    train_dataset = ChestXRayDataset(
        train_df,
        image_dir='allimages',
        transform=transform
    )

    val_dataset = ChestXRayDataset(
        val_df,
        image_dir='allimages',
        transform=transform
    )

    dataloaders = {
        'train': DataLoader(train_dataset, batch_size=32, shuffle=True),
        'val': DataLoader(val_dataset, batch_size=32)
    }

    # Initialize model
    num_classes = train_dataset.get_num_classes()
    class_names = train_dataset.get_class_names()
    print("Number of classes:", num_classes)
    print("Class names:", class_names)

    model = models.resnet50(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    # Loss and optimizer
    criterion = nn.BCEWithLogitsLoss()  # For multi-label classification
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    # Train
    train_model(model, dataloaders, criterion, optimizer)

if __name__ == "__main__":
    main()

Number of classes: 15
Class names: ['Atelectasis', 'Effusion', 'Emphysema', 'Pneumothorax', 'Infiltration', 'Nodule', 'Edema', 'Pleural_Thickening', 'Pneumonia', 'Hernia', 'Cardiomegaly', 'Fibrosis', 'Consolidation', 'No Finding', 'Mass']




Epoch 1/5
train Loss: 0.1903, train Accuracy: 0.4127
val Loss: 0.1820, val Accuracy: 0.4422
Epoch 2/5
train Loss: 0.1804, train Accuracy: 0.4236
val Loss: 0.1787, val Accuracy: 0.4218
Epoch 3/5
train Loss: 0.1769, train Accuracy: 0.4310
val Loss: 0.1769, val Accuracy: 0.4368
Epoch 4/5
train Loss: 0.1743, train Accuracy: 0.4366
val Loss: 0.1746, val Accuracy: 0.4479
Epoch 5/5
train Loss: 0.1718, train Accuracy: 0.4414
val Loss: 0.1743, val Accuracy: 0.4408
