In [None]:
from collections import Counter

def count_classes(dataset):
    labels = [label for _, label in dataset.samples]
    counts = Counter(labels)
    return {dataset.classes[k]: v for k, v in counts.items()}

print("Train:", count_classes(train_data))
print("Validation:", count_classes(val_data))
print("Test:", count_classes(test_data))

## ⚖️ Dataset Balance
Let's check how many images are in each class (Normal vs Pneumonia).

In [None]:
images, labels = next(iter(train_loader))

fig, axes = plt.subplots(2, 3, figsize=(10, 7))
for i, ax in enumerate(axes.flatten()):
    img = images[i].permute(1, 2, 0).numpy()
    img = (img * 0.5) + 0.5  # unnormalize
    ax.imshow(img, cmap="gray")
    ax.set_title(f"Label: {train_data.classes[labels[i]]}")
    ax.axis("off")
plt.tight_layout()
plt.show()

## 👀 Visualize Sample Images
Let's display some training images with their labels.

In [None]:
batch_size = 32

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_data, batch_size=batch_size, shuffle=False)

## 📦 DataLoaders
We create loaders for training, validation, and testing.

In [None]:
train_data = datasets.ImageFolder(train_dir, transform=train_transforms)
val_data   = datasets.ImageFolder(val_dir,   transform=test_transforms)
test_data  = datasets.ImageFolder(test_dir,  transform=test_transforms)

print("Train samples:", len(train_data))
print("Validation samples:", len(val_data))
print("Test samples:", len(test_data))
print("Classes:", train_data.classes)

## 📥 Load Datasets
We load images using `ImageFolder`. Each subfolder (NORMAL / PNEUMONIA) 
is treated as a class.

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

## 🛠️ Data Preprocessing & Augmentation
We resize all images to `224x224`, normalize them, and add some random 
flips/rotations to make the model more robust.

In [None]:
data_dir = "data/chest_xray"

train_dir = os.path.join(data_dir, "train")
val_dir   = os.path.join(data_dir, "val")
test_dir  = os.path.join(data_dir, "test")

print("Train folder:", train_dir)
print("Validation folder:", val_dir)
print("Test folder:", test_dir)

## 📂 Dataset Paths
We set up paths to the dataset folders (`train/`, `val/`, `test/`).


In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


# 🩺 Pneumonia Detection – Data Exploration

In this notebook, we will:

- Load the chest X-ray dataset (train/val/test).
- Apply preprocessing & augmentation.
- Visualize some sample images.
- Check dataset balance (Normal vs Pneumonia).

This is **Milestone 2** of the project.
