<a href="https://colab.research.google.com/github/AnkitSingh10-hub/XrayClassification/blob/main/pneumonia_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

# 1. Set the API Token directly (From your screenshot)
# This authenticates you without needing to upload the kaggle.json file
os.environ['KAGGLE_API_TOKEN'] = 'KGAT_b5bae63bd58fdd2f74964cade51c4ad7'
os.environ['KAGGLE_USERNAME'] = "ankitsingh388" # I see your username in the background

# 2. Update the Kaggle library to ensure it supports this token type
!pip install -q -U kaggle

# 3. Download the Chest X-Ray Dataset
print("Downloading dataset...")
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

# 4. Unzip the data
print("Unzipping data... (this might take a minute)")
!unzip -q chest-xray-pneumonia.zip

print("DONE! Data is ready in folder: /content/chest_xray")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.2/85.2 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.4/256.4 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m76.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.3/159.3 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.8/88.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataset...
Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
License(s): other
Downloading chest-xray-pneumonia.zip to /content
100% 2.29G/2.29G [01:45<00:00, 25.0MB/s]
100% 2.29G/2.29G [01:45<00:00, 23.3MB/s]
Unzipping data... (thi

In [None]:
# UPDATE THIS PATH IN YOUR CODE
# In VS Code it was likely just 'chest_xray', but in Colab it is here:
data_dir = '/content/chest_xray/chest_xray'

# Example of how your loader setup should look now:
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
val_dir = os.path.join(data_dir, 'val')

# Verify it works
print(os.listdir(train_dir))
# Should print ['NORMAL', 'PNEUMONIA']

['.DS_Store', 'NORMAL', 'PNEUMONIA']


In [None]:
# ==============================================================================
# PROJECT: Deep Learning for Medical Imaging (Pneumonia Detection)
# RESUME MATCH: PyTorch, ResNet-18, Weighted Loss, Geometric Augmentation
# ==============================================================================

# 1. Imports and Setup
# --------------------------------------------
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Metrics
try:
    from torchmetrics import Accuracy, F1Score, Recall
except ImportError:
    import subprocess
    import sys
    print("Installing torchmetrics...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "torchmetrics"])
    from torchmetrics import Accuracy, F1Score, Recall

# Device configuration (GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility
SEED = 101010
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

# 2. Data Preparation
# --------------------------------------------

# ===> FIX: CHANGED PATH FOR COLAB <===
data_dir = '/content/chest_xray/chest_xray'

train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
val_dir = os.path.join(data_dir, 'val')

# Check if folders exist
print(f"Checking path: {train_dir}")
if not os.path.exists(train_dir):
    print(f"ERROR: Train folder still not found. Please run the Kaggle download cell first.")
else:
    print("Train folder found!")

# RESUME MATCH: "Dynamic data augmentation (geometric transformations)"
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load Datasets
try:
    print(f"Loading data from: {data_dir}")
    train_dataset = ImageFolder(train_dir, transform=train_transform)
    test_dataset = ImageFolder(test_dir, transform=test_transform)

    # ===> NOTE: Changed num_workers to 2 for faster processing in Colab <===
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)
    print("Data loaded successfully.")
except Exception as e:
    print(f"Error loading data: {e}")
    train_dataset = []
    test_dataset = []

# 3. Model Architecture
# --------------------------------------------
# RESUME MATCH: "Transfer Learning (ResNet-18)"

# Load pre-trained model
resnet18 = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Freeze parameters (Feature Extraction)
for param in resnet18.parameters():
    param.requires_grad = False

# Modify final layer for Binary Classification
resnet18.fc = nn.Linear(resnet18.fc.in_features, 1)
resnet18 = resnet18.to(device)

# 4. Methodology: Weighted Loss Strategy
# --------------------------------------------
# RESUME MATCH: "Weighted Cross-Entropy Loss to penalize false negatives"

def get_pos_weight(dataset):
    if len(dataset) == 0: return torch.tensor([1.0]).to(device)

    # Calculate class distribution
    targets = torch.tensor(dataset.targets)
    class_0_count = (targets == 0).sum() # Normal
    class_1_count = (targets == 1).sum() # Pneumonia

    print(f"Class Distribution -> Normal: {class_0_count}, Pneumonia: {class_1_count}")

    if class_1_count == 0: return torch.tensor([1.0]).to(device)

    # Weight = Number of Negatives / Number of Positives
    weight = class_0_count / class_1_count
    return torch.tensor([weight], dtype=torch.float32).to(device)

if len(train_dataset) > 0:
    pos_weight = get_pos_weight(train_dataset)
else:
    pos_weight = torch.tensor([1.0]).to(device)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.Adam(resnet18.fc.parameters(), lr=0.001)

# 5. Training Loop
# --------------------------------------------

def train_model(model, loader, criterion, optimizer, epochs=5):
    print("\nStarting Training...")
    print(f"Training for {epochs} epochs on {device}...")
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for i, (inputs, labels) in enumerate(loader):
            inputs, labels = inputs.to(device), labels.float().to(device)
            labels = labels.unsqueeze(1) # Match shape [Batch, 1]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

            if i % 20 == 0:
                print(f"  Epoch {epoch+1}, Batch {i}/{len(loader)}")

        epoch_loss = running_loss / len(loader.dataset)
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {epoch_loss:.4f}")

# Train the model
if len(train_dataset) > 0:
    train_model(resnet18, train_loader, criterion, optimizer, epochs=5)

# 6. Evaluation
# --------------------------------------------
# RESUME MATCH: "Result: Achieved a measurable improvement in Recall (Sensitivity)"

def evaluate_model(model, loader):
    model.eval()

    # Initialize Metrics
    accuracy_metric = Accuracy(task="binary").to(device)
    f1_metric = F1Score(task="binary").to(device)
    recall_metric = Recall(task="binary").to(device) # Sensitivity

    print("\nEvaluating on Test Set...")
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            preds = torch.sigmoid(outputs).round()

            accuracy_metric.update(preds, labels.unsqueeze(1))
            f1_metric.update(preds, labels.unsqueeze(1))
            recall_metric.update(preds, labels.unsqueeze(1))

    final_acc = accuracy_metric.compute().item()
    final_f1 = f1_metric.compute().item()
    final_recall = recall_metric.compute().item()

    print("\n=== Final Evaluation ===")
    print(f"Accuracy: {final_acc:.4f}")
    print(f"F1 Score: {final_f1:.4f}")
    print(f"Recall (Sensitivity): {final_recall:.4f} <--- Key Resume Metric")

if len(test_dataset) > 0:
    evaluate_model(resnet18, test_loader)

Using device: cuda
Checking path: /content/chest_xray/chest_xray/train
Train folder found!
Loading data from: /content/chest_xray/chest_xray
Data loaded successfully.
Class Distribution -> Normal: 1341, Pneumonia: 3875

Starting Training...
Training for 5 epochs on cuda...
  Epoch 1, Batch 0/163
  Epoch 1, Batch 20/163
  Epoch 1, Batch 40/163
  Epoch 1, Batch 60/163
  Epoch 1, Batch 80/163
  Epoch 1, Batch 100/163
  Epoch 1, Batch 120/163
  Epoch 1, Batch 140/163
  Epoch 1, Batch 160/163
Epoch [1/5] Loss: 0.2116
  Epoch 2, Batch 0/163
  Epoch 2, Batch 20/163
  Epoch 2, Batch 40/163
  Epoch 2, Batch 60/163
  Epoch 2, Batch 80/163
  Epoch 2, Batch 100/163
  Epoch 2, Batch 120/163
  Epoch 2, Batch 140/163
  Epoch 2, Batch 160/163
Epoch [2/5] Loss: 0.1333
  Epoch 3, Batch 0/163
  Epoch 3, Batch 20/163
  Epoch 3, Batch 40/163
  Epoch 3, Batch 60/163
  Epoch 3, Batch 80/163
  Epoch 3, Batch 100/163
  Epoch 3, Batch 120/163
  Epoch 3, Batch 140/163
  Epoch 3, Batch 160/163
Epoch [3/5] Loss: 0

In [None]:
# 1. Save the model to Colab's local disk
torch.save(resnet18.state_dict(), 'pneumonia_resnet18.pth')
print("Model saved successfully!")

# 2. Download it to your computer (Optional but recommended)
from google.colab import files
files.download('pneumonia_resnet18.pth')

Model saved successfully!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>