**Deep Learning-based Pneumonia Detection Using Chest X-Ray Images**

Programming Assignment #1

CAP 5516 - Medical Image Computing (Spring 2026)



*Darinka Townsend*

**Libraries**

In [None]:
!pip install tqdm

In [None]:
import os
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms
from PIL import Image
from tqdm import tqdm
import kagglehub

# Modules
import utils
import model
import train_utils

import importlib
importlib.reload(utils)
importlib.reload(model)
importlib.reload(train_utils)


**DataSet**

In [None]:
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
print("Path to dataset files:", path)
DATA_DIR = os.path.join(path, "chest_xray")
print(os.listdir(DATA_DIR))

In [None]:
train_folder = os.path.join(DATA_DIR, "train")
utils.show_images(train_folder, "NORMAL", n=4)
utils.show_images(train_folder, "PNEUMONIA", n=4)

In [None]:
print("Train images:", utils.count_images(os.path.join(DATA_DIR, "train")))
print("Val images:", utils.count_images(os.path.join(DATA_DIR, "val")))
print("Test images:", utils.count_images(os.path.join(DATA_DIR, "test")))

In [None]:
train_dir = os.path.join(DATA_DIR, "train")
val_dir   = os.path.join(DATA_DIR, "val")
test_dir  = os.path.join(DATA_DIR, "test")

train_counts = utils.class_counts(train_dir)
val_counts   = utils.class_counts(val_dir)
test_counts  = utils.class_counts(test_dir)

print("TRAIN:", train_counts)
print("VAL:  ", val_counts)
print("TEST: ", test_counts)

In [None]:
utils.print_percentages(train_counts, "TRAIN")
utils.print_percentages(val_counts,   "VAL")
utils.print_percentages(test_counts,  "TEST")

In [None]:
utils.barplot_counts(train_counts, "Train split class counts")
utils.barplot_counts(val_counts,   "Val split class counts")
utils.barplot_counts(test_counts,  "Test split class counts")

# **Data Augmentation**

*Train DataSet*

In [None]:
train_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomResizedCrop(224, scale=(0.85, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(
    os.path.join(DATA_DIR, "train"),
    transform=train_transforms
)

print("Train dataset size:", len(train_dataset))
print("Classes:", train_dataset.classes)



# **Class Weight**

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = utils.compute_class_weights(train_dataset, device)
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

# **DataLoaders**

In [None]:
eval_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_dataset = datasets.ImageFolder(
    os.path.join(DATA_DIR, "val"),
    transform=eval_transforms
)

test_dataset = datasets.ImageFolder(
    os.path.join(DATA_DIR, "test"),
    transform=eval_transforms
)

In [None]:
BATCH_SIZE = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=1,
    pin_memory=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=1,
    pin_memory=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=1,
    pin_memory=False
)

print("Train batches:", len(train_loader))
print("Val batches:", len(val_loader))
print("Test batches:", len(test_loader))

# **Task 1.1: ResNet-18**

***Inicialization***

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

net = model.PneumoniaResNet18(pretrained=False)
net = net.to(device)
print(net)

***Loss***

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = utils.compute_class_weights(train_dataset, device)
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

***Optimizer***

In [None]:
learning_rate = 1e-4
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4)

In [None]:
print("Device:", device)
print("Loss weights:", class_weights)
print("LR:", optimizer.param_groups[0]["lr"])

***Training***

In [None]:
history = train_utils.train_model(
    model=net,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10,
    scheduler=None,
    save_path="best_resnet18_scratch.pt",
    print_every=50
)

In [None]:
train_utils.plot_history(history)

***Evaluation in Test***

In [None]:
net.load_state_dict(torch.load("best_resnet18_scratch.pt", map_location=device))
net.to(device)
net.eval()

In [None]:
test_acc, per_class_acc, cm, preds, labels = train_utils.evaluate_test(
    net,
    test_loader,
    device,
    test_dataset.classes
)

print(f"Test Accuracy (overall): {test_acc:.4f}")
print("Test Accuracy (per class):")
for k, v in per_class_acc.items():
    print(f"  {k}: {v:.4f}")

In [None]:
train_utils.plot_confusion_matrix(cm, test_dataset.classes)

In [None]:
metrics = train_utils.compute_classification_metrics(cm, test_dataset.classes)

Although the overall test accuracy reached 73.7%, class-wise analysis reveals a strong bias toward the majority class (Pneumonia). The model achieves very high recall for Pneumonia (97.2%), but performs poorly in detecting Normal cases (recall 34.6%). This indicates that training from scratch on an imbalanced dataset leads to a classifier that over-predicts the dominant class.

# **Task 1.2: ResNet-18 Transfer Learning**

***Inicialization***

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

net_ft = model.PneumoniaResNet18(pretrained=True)
net_ft = net_ft.to(device)
print(net_ft)

***Loss***

In [None]:
class_weights = utils.compute_class_weights(train_dataset, device)
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

***Optimizer***

In [None]:
lr_ft = 1e-5
optimizer = torch.optim.Adam(net_ft.parameters(), lr=lr_ft, weight_decay=1e-4)

***Training***

In [None]:
history_ft = train_utils.train_model(
    model=net_ft,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10,
    scheduler=None,
    save_path="best_resnet18_pretrained.pt",
    print_every=50
)

In [None]:
train_utils.plot_history(history_ft)

***Evaluation in Test***

In [None]:
net_ft.load_state_dict(torch.load("best_resnet18_pretrained.pt", map_location=device))
net_ft.to(device)
net_ft.eval()

test_acc2, per_class_acc2, cm2, preds2, labels2 = train_utils.evaluate_test(
    net_ft, test_loader, device, test_dataset.classes
)

print(f"Task 1.2 Test Accuracy (overall): {test_acc2:.4f}")
print("Task 1.2 Test Accuracy (per class):")
for k, v in per_class_acc2.items():
    print(f"  {k}: {v:.4f}")

In [None]:
train_utils.plot_confusion_matrix(cm2, test_dataset.classes)

In [None]:
metrics2 = train_utils.compute_classification_metrics(cm2, test_dataset.classes)

The transfer learning approach significantly improved the overall performance compared to training from scratch. The test accuracy increased from 73.7% to 79.6%, and the macro F1-score improved notably. The model achieved near-perfect recall for the Pneumonia class (99.7%), indicating excellent sensitivity in detecting positive cases. However, the recall for the Normal class remains relatively low (46.1%), meaning the model still tends to over-predict Pneumonia and generate false positives. Although pretraining improved feature extraction and generalization, class imbalance continues to influence the decision boundary, suggesting that additional balancing strategies are necessary.

# **Improve with WeightedRandomSampler**

***DataLoader with WeightedRandomSampler***

In [None]:
labels = [y for _, y in train_dataset]
class_counts = Counter(labels)

num_classes = len(train_dataset.classes)
class_sample_count = torch.tensor(
    [class_counts[i] for i in range(num_classes)],
    dtype=torch.float
)

weights = 1.0 / class_sample_count

sample_weights = torch.tensor([weights[y] for y in labels], dtype=torch.float)

sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    sampler=sampler,
    num_workers=0,
    pin_memory=True,
    persistent_workers=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
    persistent_workers=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
    persistent_workers=False
)

***Inicialization***

In [None]:
net_ft = model.PneumoniaResNet18(pretrained=True).to(device)

***Optimizer***

In [None]:
optimizer = torch.optim.Adam(net_ft.parameters(), lr=1e-5, weight_decay=1e-4)

***Loss***

In [None]:
criterion = torch.nn.CrossEntropyLoss()

***Training***

In [None]:
history_ft_balanced = train_utils.train_model(
    model=net_ft,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10,
    scheduler=None,
    save_path="best_resnet18_pretrained_balanced.pt"
)

In [None]:
train_utils.plot_history(history_ft_balanced)

***Evaluation on Test***

In [None]:
net_ft.load_state_dict(torch.load("best_resnet18_pretrained_balanced.pt", map_location=device))
net_ft.eval()

test_acc_bal, per_class_acc_bal, cm_bal, _, _ = train_utils.evaluate_test(
    net_ft,
    test_loader,
    device,
    test_dataset.classes
)

In [None]:
print("Balanced Test Accuracy:", test_acc_bal)

In [None]:
train_utils.plot_confusion_matrix(cm_bal, test_dataset.classes)

In [None]:
metrics3 = train_utils.compute_classification_metrics(cm_bal, test_dataset.classes)

The balanced sampling strategy significantly reduced the bias toward the Pneumonia class by forcing the model to observe Normal samples more frequently during training. As a result, Normal recall increased from 46.1% to 57.7%, and the macro F1-score improved from 0.7447 to 0.8047, indicating a more balanced performance across classes. Importantly, Pneumonia recall remained very high (99.2%), showing that the improvement in Normal detection did not substantially harm sensitivity to Pneumonia. Overall, WeightedRandomSampler effectively mitigated class imbalance and improved generalization on the minority class.

# **Comparative Analysis**

In [None]:
train_utils.plot_three_histories(
    history,
    history_ft,
    history_ft_balanced,
    ema_beta=0.7
)

In [None]:
results = {
    "Task 1.1": {
        "accuracy": test_acc,
        "metrics": metrics
    },
    "Task 1.2": {
        "accuracy": test_acc2,
        "metrics": metrics2
    },
    "Task 1.2.1": {
        "accuracy": test_acc_bal,
        "metrics": metrics3
    }
}

train_utils.compare_experiments(results)

The experimental comparison demonstrates that transfer learning significantly improves performance over training from scratch. However, class imbalance remains a critical issue. Incorporating a balanced sampling strategy further enhances minority class detection and leads to the best overall performance. The final model (Task 1.2.1) achieves the highest accuracy and macro F1-score, indicating improved generalization and more balanced classification behavior.