# Part 2 - Data Exploration

### First, let's start by importing everything

In [1]:
from proj2.config import PROJ_ROOT, DATA_DIR, MODELS_DIR, REPORTS_DIR, FIGURES_DIR

[32m2025-04-22 16:06:29.846[0m | [1mINFO    [0m | [36mproj2.config[0m:[36m<module>[0m:[36m29[0m - [1mPROJ_ROOT path is: /Users/francescobondi/Desktop/stuff/ETH/FS25/ML for Healthcare/project-2-ml4hc[0m


---

## Load data

In [2]:
from proj2.part2.dataloader import get_data_set_loader

train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader = get_data_set_loader()

BASE_DIR: /Users/francescobondi/Desktop/stuff/ETH/FS25/ML for Healthcare/project-2-ml4hc/data/external/datasets/paultimothymooney/chest-xray-pneumonia/versions/2/chest_xray
Classes: ['NORMAL', 'PNEUMONIA']
Train samples: 5216
Val   samples: 16
Test  samples: 624


---

# Q1.1 - Exploration of Data

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# Count labels in train dataset
label_names = train_dataset.classes  # ['NORMAL', 'PNEUMONIA']
label_counts = Counter([label for _, label in train_dataset])

# Plot
plt.bar(label_names, [label_counts[0], label_counts[1]], color=["skyblue", "salmon"])
plt.title("Label Distribution in Train Set")
plt.ylabel("Number of Samples")
plt.show()


In [None]:
def get_one_sample_per_class(dataset):
    seen = set()
    samples = {}

    for img, label in dataset:
        if label not in seen:
            samples[label] = img
            seen.add(label)
        if len(seen) == len(dataset.classes):
            break
    return samples

samples = get_one_sample_per_class(train_dataset)

# Plot the images
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
for i, label in enumerate(sorted(samples.keys())):
    axes[i].imshow(samples[label].permute(1, 2, 0), cmap='gray')
    axes[i].axis('off')
    axes[i].set_title(f"Label: {train_dataset.classes[label]}")

plt.suptitle("One Sample per Class")
plt.tight_layout()
plt.show()


# Q1.2 - Visual Differences

In the case of NORMAL lungs, the lungs appear clear and distinguishable. PNEUMONIA, instead, shows cloudy lungs and not completely visbile, indicating a possible fluid or injection.

---

# Q1.3 - Potential Source of Bias

One great problem could be that the model learns to classify samples by looking at scanner-specific artifacts. Also, in this case we have some class imbalanceness, since the PNEUMONIA samples are more than 2x the samples of NORMAL lungs.

---

# Q1.4

The data is already transformed when loaded inside the datasets. The transformation used is:

```
 image_transform = transforms.Compose(
        [
            transforms.Resize(image_size),  # ^ Change this to the desired size
            transforms.ToTensor(),
        ]
    )
```

However, this could be further changed to include a reduction to grayscale, and a final normalization of pixel values.

---

# Q2 - CNN Classifier

## Q2.1 - Simple CNN

In [3]:
# Simple CNN Model
import torch.nn.functional as F
import torch.nn as nn
import torch

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # input is grayscale (1 channel)
            nn.ReLU(),
            nn.MaxPool2d(2),  # 112x112

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 56x56

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 28x28
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 1),  # Binary classification
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

## Q2.2 - Performance of Test Set

It's not clear, should I train it first? Or is it zero-shot classification?

## Train the model

In [7]:
from proj2.part2.training import train_CNN, eval_CNN

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
train_CNN(model, train_loader, val_loader, optimizer, criterion, num_epochs, device)

Checkpoint directory: /Users/francescobondi/Desktop/stuff/ETH/FS25/ML for Healthcare/project-2-ml4hc/models/cnn


Epochs:   0%|          | 0/10 [00:00<?, ?epoch/s]

 Epoch 1/10 ▶ TRAIN :   0%|          | 0/5216 [00:00<?, ?img/s]

KeyboardInterrupt: 

## Now do the evaluation

In [8]:
from proj2.config import PROJ_ROOT
CHECKPOINT_PATH = PROJ_ROOT / "models" / "cnn" / "model_final.pt"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
model.load_state_dict(torch.load(CHECKPOINT_PATH, map_location=device))
# Evaluate the model
eval_CNN(model, test_loader, device=device)

Evaluating:   0%|          | 0/20 [00:00<?, ?batch/s]


✅ Test Set Performance:
  Test Loss       = 1.8491
  AUROC           = 0.9070
  AUPRC           = 0.9230
  Accuracy        = 0.7452
  Precision       = 0.7119
  Recall          = 0.9949
  F1 Score        = 0.8299
  Confusion Matrix:
[[ 77 157]
 [  2 388]]


{'AUROC': 0.9070403243480166,
 'AUPRC': 0.9230311232303174,
 'Accuracy': 0.7451923076923077,
 'Precision': 0.7119266055045872,
 'Recall': 0.9948717948717949,
 'F1 Score': 0.8299465240641711,
 'Confusion Matrix': array([[ 77, 157],
        [  2, 388]]),
 'Test Loss': 1.8491131930165863}