In [None]:
!pip -q install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu121
!pip -q install transformers datasets accelerate timm
!pip -q install kaggle pydicom pillow opencv-python scikit-learn matplotlib seaborn grad-cam gradio


In [None]:
import torch; torch.cuda.is_available()


In [None]:
from google.colab import files
files.upload()

In [None]:
import os, shutil
os.makedirs('/root/.kaggle', exist_ok=True)
shutil.move('/content/kaggle.json', '/root/.kaggle/kaggle.json')
os.chmod('/root/.kaggle/kaggle.json', 0o600)


In [None]:
!kaggle competitions download -c rsna-pneumonia-detection-challenge -p /content/rsna
import zipfile, glob
for z in glob.glob('/content/rsna/*.zip'):
    with zipfile.ZipFile(z, 'r') as zip_ref:
        zip_ref.extractall('/content/rsna')


In [None]:
!pip -q install pydicom

import pandas as pd, numpy as np, os, cv2, pydicom
from tqdm import tqdm

DICOM_DIR = '/content/rsna/stage_2_train_images'
LABELS_CSV = '/content/rsna/stage_2_train_labels.csv'
OUT_DIR = '/content/rsna_png'
os.makedirs(f'{OUT_DIR}/images', exist_ok=True)

labels = pd.read_csv(LABELS_CSV)
img_labels = labels.groupby('patientId')['Target'].max().reset_index()
img_labels['label'] = img_labels['Target'].astype(int)
img_labels = img_labels[['patientId','label']]

# LIMIT to a manageable subset first (adjust up later)
MAX_IMAGES = 12000  # try 4000 if still OOM; 12000 if stable
img_labels = img_labels.sample(n=min(MAX_IMAGES, len(img_labels)), random_state=42)

# Low resolution to save memory; try (160,160) or (128,128) if still OOM
RES = 160

def dicom_to_png(path, size=(RES, RES)):
    d = pydicom.dcmread(path)
    arr = d.pixel_array.astype(np.float32)
    # normalize 0-255
    arr -= arr.min()
    if arr.max() > 0:
        arr /= arr.max()
    arr = (arr * 255.0).clip(0,255).astype(np.uint8)
    arr = cv2.resize(arr, size, interpolation=cv2.INTER_AREA)
    img3 = np.stack([arr, arr, arr], axis=-1)
    return img3

paths, ys = [], []
for pid, y in tqdm(img_labels.values):
    p = os.path.join(DICOM_DIR, f'{pid}.dcm')
    if not os.path.exists(p):
        continue
    img = dicom_to_png(p)
    outp = os.path.join(OUT_DIR, 'images', f'{pid}.png')
    cv2.imwrite(outp, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    paths.append(outp)
    ys.append(int(y))

meta = pd.DataFrame({'image_path': paths, 'label': ys})
meta.to_csv(f'{OUT_DIR}/train_meta.csv', index=False)
meta['label'].value_counts(), len(meta)


In [None]:
from datasets import Dataset
from sklearn.model_selection import StratifiedShuffleSplit
import pandas as pd

meta = pd.read_csv('/content/rsna_png/train_meta.csv')
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42)
train_idx, val_idx = next(sss.split(meta['image_path'], meta['label']))
train_df = meta.iloc[train_idx].reset_index(drop=True)
val_df = meta.iloc[val_idx].reset_index(drop=True)

ds_train = Dataset.from_pandas(train_df)
ds_val = Dataset.from_pandas(val_df)
len(ds_train), len(ds_val)


In [None]:
import timm, torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms

# Image transforms (match RES)
RES = 224  # set same as preprocessing
train_tfms = transforms.Compose([
    transforms.Resize((RES, RES)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.25,0.25,0.25])
])
val_tfms = transforms.Compose([
    transforms.Resize((RES, RES)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.25,0.25,0.25])
])

# Lazy loader functions
def load_image(example):
    example['image'] = example['image_path']
    return example

ds_train = ds_train.map(load_image)
ds_val = ds_val.map(load_image)

# Minimal collate to load+transform per batch
def collate_fn(batch, train=True):
    imgs = []
    labels = []
    for ex in batch:
        img = Image.open(ex['image']).convert('RGB')
        img = (train_tfms if train else val_tfms)(img)
        imgs.append(img)
        labels.append(int(ex['label']))
    return {'pixel_values': torch.stack(imgs), 'labels': torch.tensor(labels, dtype=torch.long)}

# Model
num_classes = 2
model = timm.create_model('resnet18', pretrained=True, num_classes=num_classes)
model = model.cuda()


In [None]:
import math, numpy as np
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score


BATCH = 16  # try 16; if OOM, set 8 or 4
ACCUM = 2   # gradient accumulation steps (effective batch = BATCH*ACCUM)
EPOCHS = 3
LR = 3e-4


train_loader = DataLoader(ds_train, batch_size=BATCH, shuffle=True, collate_fn=lambda x: collate_fn(x, True), num_workers=2, pin_memory=True)
val_loader   = DataLoader(ds_val,   batch_size=BATCH, shuffle=False, collate_fn=lambda x: collate_fn(x, False), num_workers=2, pin_memory=True)


criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-2)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS*len(train_loader))


best_auc = -1
for epoch in range(EPOCHS):
    model.train()
    optimizer.zero_grad(set_to_none=True)
    running_loss = 0.0
    for step, batch in enumerate(train_loader):
        x = batch['pixel_values'].cuda(non_blocking=True)
        y = batch['labels'].cuda(non_blocking=True)
        logits = model(x)
        loss = criterion(logits, y) / ACCUM
        loss.backward()
        if (step+1) % ACCUM == 0:
            optimizer.step()
            optimizer.zero_grad(set_to_none=True)
            scheduler.step()
        running_loss += loss.item() * ACCUM
        if (step+1) % 100 == 0:
            print(f"epoch {epoch+1} step {step+1}/{len(train_loader)} loss {running_loss/(step+1):.4f}")


    # Eval
    model.eval()
    all_probs, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            x = batch['pixel_values'].cuda(non_blocking=True)
            y = batch['labels'].cuda(non_blocking=True)
            logits = model(x)
            probs = torch.softmax(logits, dim=1)[:,1].detach().cpu().numpy()
            all_probs.append(probs)
            all_labels.append(y.detach().cpu().numpy())
    all_probs = np.concatenate(all_probs)
    all_labels = np.concatenate(all_labels)
    preds = (all_probs >= 0.5).astype(int)
    acc = accuracy_score(all_labels, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(all_labels, preds, average='binary')
    try:
        auc = roc_auc_score(all_labels, all_probs)
    except:
        auc = float('nan')
    print(f"E{epoch+1}: acc={acc:.3f} rec={rec:.3f} f1={f1:.3f} auc={auc:.3f}")
    if auc > best_auc:
        best_auc = auc
        torch.save(model.state_dict(), '/content/best_resnet18.pth')
print('Best AUC:', best_auc)

In [None]:
!pip -q install --upgrade grad-cam


In [None]:

from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
import numpy as np, cv2
from PIL import Image
import torch

model.eval()

# Choose a proper conv layer for ResNet18 (timm)
target_layers = [model.layer4[-1].conv2]  # conv layer, not the whole block

# Prepare one image
sample_path = val_df.sample(1, random_state=0).iloc[0]['image_path']
img = Image.open(sample_path).convert('RGB').resize((RES, RES))
img_np = np.array(img).astype(np.float32) / 255.0

# Transform (same as your val_tfms but keep a single tensor)
x = val_tfms(img).unsqueeze(0)
x = x.cuda()  # move to GPU

# Initialize GradCAM (no use_cuda argument in new versions)
cam = GradCAM(model=model, target_layers=target_layers)

# Run CAM for the positive class (index 1 = Pneumonia)
grayscale_cam = cam(input_tensor=x, targets=[ClassifierOutputTarget(1)])

# Convert and overlay
grayscale = grayscale_cam[0]  # first (and only) image in batch
vis = show_cam_on_image(img_np, grayscale, use_rgb=True)
cv2.imwrite('/content/cam_resnet18.png', cv2.cvtColor(vis, cv2.COLOR_RGB2BGR))
'/content/cam_resnet18.png'


In [None]:
import gradio as gr

labels = ['Normal', 'Pneumonia']
model.eval()

def predict(img):
    img = Image.fromarray(img).convert('RGB')
    x = val_tfms(img).unsqueeze(0).cuda()
    with torch.no_grad():
        logits = model(x)
        probs = torch.softmax(logits, dim=1).squeeze(0).cpu().numpy()
    return {labels[i]: float(probs[i]) for i in range(2)}

demo = gr.Interface(fn=predict, inputs=gr.Image(type='numpy'), outputs=gr.Label(num_top_classes=2), title='Pneumonia Detector (ResNet18)')
demo.launch(share=True)


In [None]:
import os; os.path.exists('/content/best_resnet18.pth')


In [None]:
import timm, torch, torch.nn as nn
from PIL import Image
from torchvision import transforms
import numpy as np

num_classes = 2
model = timm.create_model('resnet18', pretrained=False, num_classes=num_classes).cuda()
model.load_state_dict(torch.load('/content/best_resnet18.pth', map_location='cuda'))
model.eval()

RES = 224
val_tfms = transforms.Compose([
    transforms.Resize((RES, RES)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.25,0.25,0.25]),
])

sample_path = val_df.sample(1, random_state=0).iloc[0]['image_path']
img = Image.open(sample_path).convert('RGB')
x = val_tfms(img).unsqueeze(0).cuda()
with torch.no_grad():
    probs = torch.softmax(model(x), dim=1).squeeze(0).cpu().numpy()
print({'Normal': float(probs[0]), 'Pneumonia': float(probs[1])})
