In [11]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, roc_auc_score

In [12]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transforms
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # global mean and standard deviation of the RGB channels of the ImageNet dataset.
])

# Load data
train_set = torchvision.datasets.ImageFolder(root='chest_xray/chest_xray/train', transform=transform)
val_set = torchvision.datasets.ImageFolder(root='chest_xray/chest_xray/val', transform=transform)
test_set = torchvision.datasets.ImageFolder(root='chest_xray/chest_xray/test', transform=transform)

# Extract labels from the dataset
train_labels = [label for _, label in train_set.imgs]

# Count each class's samples to calculate weights
class_counts = torch.tensor(
    [(torch.tensor(train_labels) == t).sum() for t in torch.unique(torch.tensor(train_labels), sorted=True)]
)

# Calculate weight for each class
weights = 1. / class_counts.float()

# Assign a weight to each sample
sample_weights = torch.tensor([weights[label] for label in train_labels])

# Define the sampler with these sample weights
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)


train_loader = DataLoader(train_set, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

In [3]:
# Model setup
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # '2' for Pneumonia/Normal classes
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# TensorBoard
writer = SummaryWriter('runs/pneumonia_detection_experiment')



In [4]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
            writer.add_scalar('training loss', loss.item(), epoch * len(train_loader) + i)

torch.save(model, 'model_pretrained.pth')

Epoch [1/1], Step [100/163], Loss: 0.1107


In [5]:

# Evaluation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))
    writer.add_scalar('test accuracy', 100 * correct / total, epoch)

writer.close()

Test Accuracy of the model on the test images: 86.69871794871794 %


In [6]:
def evaluate_model(model, data_loader):    
    # Evaluation
    model.eval()
    y_pred = []
    y_true = []
    y_proba = []
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(labels.cpu().numpy())
            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            y_proba.extend(probabilities[:, 1].cpu().numpy())
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        # Calculate metrics
        accuracy = correct / total
        cm = confusion_matrix(y_true, y_pred)
        precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
        TN, FP, FN, TP = cm.ravel()
        specificity = TN / (TN + FP)
        npv = TN / (TN + FN)  # Negative Predictive Value
        auc = roc_auc_score(y_true, y_proba)

        return accuracy, cm, precision, recall, f1_score, specificity, npv, auc

In [13]:
# Call the evaluation function on train data
accuracy, conf_matrix, precision, recall, f1_score, specificity, npv, auc = evaluate_model(model, train_loader)
print("Metrics on the train images")
print(f"Accuracy: {100* accuracy:.4f}%")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Precision: {100* precision:.4f}%")
print(f"Recall: {100* recall:.4f}%")
print(f"F1 Score: {100* f1_score:.4f}%")
print(f"Specificity: {100* specificity:.4f}%")
print(f"Negative Predictive Value: {100* npv:.4f}%")
print(f"AUC: {100* auc:.4f}%")

print("\n\n")

Metrics on the train images
Accuracy: 85.8974%
Confusion Matrix:
[[151  83]
 [  5 385]]
Precision: 0.8226
Recall: 0.9872
F1 Score: 0.8974
Specificity: 0.6453
Negative Predictive Value: 0.9679
AUC: 0.9369





In [8]:
# Call the evaluation function on test data
accuracy, conf_matrix, precision, recall, f1_score, specificity, npv, auc = evaluate_model(model, test_loader)
print("Metrics on the train images")
print(f"Accuracy: {100* accuracy:.4f}%")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Precision: {100* precision:.4f}%")
print(f"Recall: {100* recall:.4f}%")
print(f"F1 Score: {100* f1_score:.4f}%")
print(f"Specificity: {100* specificity:.4f}%")
print(f"Negative Predictive Value: {100* npv:.4f}%")
print(f"AUC: {100* auc:.4f}%")


Metrics on the test images
Accuracy: 0.8718
Confusion Matrix:
[[158  76]
 [  4 386]]
Precision: 0.8355
Recall: 0.9897
F1 Score: 0.9061
Specificity: 0.6752
Negative Predictive Value: 0.9753
AUC: 0.9398
