# Chest X-Ray Pneumonia Detection - Training on Kaggle

**Task**: Binary classification (NORMAL vs PNEUMONIA)

**Before running, make sure you have:**
1. GPU enabled: Settings -> Accelerator -> GPU P100 or T4
2. Added dataset: + Add Input -> Search `chest-xray-pneumonia` by Paul Mooney
3. Added your code: + Add Input -> Upload `project_for_colab.zip`

In [None]:
# Cell 1: Check GPU and find datasets
import torch
import os

print("=" * 50)
print("GPU CHECK")
print("=" * 50)
!nvidia-smi -L 2>/dev/null || echo "No GPU found"
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

print("\n" + "=" * 50)
print("DATASET SEARCH")
print("=" * 50)

# Show what kaggle has in /kaggle/input
print("\nAll files in /kaggle/input:")
!find /kaggle/input -maxdepth 5 -type f | head -30

print("\nAll directories in /kaggle/input:")
!find /kaggle/input -maxdepth 5 -type d | head -30

# Search for project zip
project_zip_path = None
chest_xray_data_path = None

for root, dirs, files in os.walk('/kaggle/input'):
    for f in files:
        if f.endswith('.zip'):
            project_zip_path = os.path.join(root, f)
    if 'train.py' in files and 'config.py' in files:
        if project_zip_path is None:
            project_zip_path = root
    if 'train' in dirs and 'test' in dirs:
        train_path = os.path.join(root, 'train')
        if os.path.isdir(train_path):
            contents = os.listdir(train_path)
            if 'NORMAL' in contents and 'PNEUMONIA' in contents:
                if chest_xray_data_path is None:
                    chest_xray_data_path = root

print(f"\n{'=' * 50}")
print(f"Project zip:     {project_zip_path or 'NOT FOUND'}")
print(f"Chest xray data: {chest_xray_data_path or 'NOT FOUND'}")
print(f"{'=' * 50}")

if not project_zip_path:
    print("\n>>> ADD YOUR PROJECT: + Add Input -> Upload project_for_colab.zip")
if not chest_xray_data_path:
    print("\n>>> ADD DATASET: + Add Input -> Search 'chest-xray-pneumonia'")
if project_zip_path and chest_xray_data_path:
    print("\n[OK] Everything found! Continue to Cell 2.")

In [None]:
# Cell 2: Extract project and write hyperparameters
import os, json, shutil

project_dir = '/kaggle/working/medical-image-classification'

if os.path.exists(project_dir):
    shutil.rmtree(project_dir)

if project_zip_path is None:
    raise Exception("No project found! Go back to Cell 1 and add your project dataset.")

if project_zip_path.endswith('.zip'):
    os.makedirs(project_dir, exist_ok=True)
    os.system(f'unzip -q "{project_zip_path}" -d "{project_dir}"')
    print(f"[OK] Extracted: {project_zip_path}")
else:
    shutil.copytree(project_zip_path, project_dir)
    print(f"[OK] Copied: {project_zip_path}")

%cd {project_dir}

# Write chest X-ray hyperparameters
params = {
    "best_hyperparameters": {
        "learning_rate": 0.001,
        "batch_size": 32,
        "epochs": 50
    },
    "optimization_summary": {
        "method": "transferred_from_brain_tumor",
        "dataset": "chest_xray",
        "classes": 2
    },
    "dataset_info": {
        "name": "Chest X-Ray Pneumonia Detection",
        "classes": ["NORMAL", "PNEUMONIA"],
        "num_classes": 2
    }
}
os.makedirs('results/phase1', exist_ok=True)
with open('results/phase1/best_hyperparameters.json', 'w') as f:
    json.dump(params, f, indent=2)

print("[OK] Hyperparameters written!")
!ls

In [None]:
# Cell 3: Install dependencies
!pip install -q albumentations scikit-image 2>/dev/null
import torch, torchvision, sklearn, PIL
print(f"PyTorch: {torch.__version__}, CUDA: {torch.cuda.is_available()}")
print("[OK] Dependencies ready!")

In [None]:
# Cell 4: Link chest xray dataset
import os
from pathlib import Path

if chest_xray_data_path is None:
    raise Exception("No chest xray dataset found! Go back to Cell 1 and add it.")

print(f"Dataset source: {chest_xray_data_path}")
os.makedirs('data/chest_xray', exist_ok=True)

for split in ['train', 'val', 'test']:
    link = f'data/chest_xray/{split}'
    source = os.path.join(chest_xray_data_path, split)
    if os.path.islink(link) or os.path.exists(link):
        os.remove(link)
    if os.path.exists(source):
        os.symlink(source, link)
        print(f"  {split}: linked")
    else:
        print(f"  {split}: not found (skipping)")

def count_imgs(d):
    return sum(len(list(Path(d).rglob(e))) for e in ['*.jpeg','*.jpg','*.png'])

print(f"\nImage counts:")
for s in ['train','val','test']:
    p = f'data/chest_xray/{s}'
    if os.path.exists(p):
        print(f"  {s}: {count_imgs(p)}")

print(f"\nClasses in train:")
for c in ['NORMAL','PNEUMONIA']:
    p = f'data/chest_xray/train/{c}'
    if os.path.exists(p):
        print(f"  {c}: {count_imgs(p)}")

print("\n[OK] Dataset ready!")

In [None]:
# Cell 5: Verify config
!python -c "from config import get_config; c = get_config('chest_xray'); print('Config:', c['dataset']['name'], '| Classes:', c['dataset']['num_classes'])"
!python -c "from src.datasets.chest_xray import ChestXRayDataset; print('Dataset loader: OK')"
print("\nHyperparameters:")
!cat results/phase1/best_hyperparameters.json
print("\n[OK] Ready to train!")

In [None]:
# Cell 6: TRAIN!
print("=" * 60)
print("  TRAINING: Chest X-Ray | ResNet-18 | 50 epochs")
print("=" * 60 + "\n")

!python train.py --dataset chest_xray --use_optimized --device cuda

print("\n" + "=" * 60)
print("  TRAINING COMPLETE!")
print("=" * 60)

In [None]:
# Cell 7: Results
import json, os

!ls -lh models/checkpoints/

if os.path.exists('results/phase1/training_history.json'):
    with open('results/phase1/training_history.json') as f:
        h = json.load(f)
    print(f"\nTrain Acc:  {h['train_acc'][-1]:.2f}%")
    print(f"Val Acc:    {h['val_acc'][-1]:.2f}%")
    print(f"Epochs:     {len(h['train_acc'])}")
    print(f"Best Val:   {max(h['val_acc']):.2f}%")

In [None]:
# Cell 8: Save for download
import shutil

for f in ['models/checkpoints/best_model.pth', 'results/phase1/training_history.json', 'results/training_results_chest_xray.json']:
    if os.path.exists(f):
        shutil.copy2(f, '/kaggle/working/')
        print(f"[OK] {os.path.basename(f)}")

print("\nFiles ready:")
!ls -lh /kaggle/working/*.pth /kaggle/working/*.json 2>/dev/null
print("\nClick 'Save Version' -> 'Save & Run All' -> then download from Output tab")